Révision 3055
tmp/org.txm.concordance.rcp/src/org/txm/concordance/rcp/editors/ConcordanceEditor.java (revision 3055) | ||
---|---|---|
1902 | 1902 |
* Refresh reference column title. |
1903 | 1903 |
*/ |
1904 | 1904 |
public void refreshReferenceColumnTitle() { |
1905 |
|
|
1906 |
referenceColumn.setText(concordance.getRefViewPattern().getTitle()); |
|
1905 |
if (concordance.getRefViewPattern() != null) { |
|
1906 |
referenceColumn.setText(concordance.getRefViewPattern().getTitle()); |
|
1907 |
} |
|
1908 |
else { |
|
1909 |
referenceColumn.setText("References"); |
|
1910 |
} |
|
1907 | 1911 |
} |
1908 | 1912 |
|
1909 | 1913 |
/** |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/StructuralUnitProperty.java (revision 3055) | ||
---|---|---|
39 | 39 |
import org.txm.searchengine.cqp.clientExceptions.CqiClientException; |
40 | 40 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
41 | 41 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
42 |
import org.txm.searchengine.cqp.corpus.query.MatchUtils; |
|
42 | 43 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
43 | 44 |
import org.txm.utils.logger.Log; |
44 | 45 |
|
... | ... | |
301 | 302 |
|
302 | 303 |
return null; |
303 | 304 |
} |
304 |
|
|
305 |
|
|
305 | 306 |
public String cpos2Str(int position) throws UnexpectedAnswerException, IOException, CqiServerError { |
306 | 307 |
AbstractCqiClient cqiClient = CorpusManager.getCorpusManager().getCqiClient(); |
307 |
return cqiClient.getSingleData(this, new int[] {position}).get(0);
|
|
308 |
return cqiClient.getSingleData(this, new int[] { position }).get(0);
|
|
308 | 309 |
} |
309 | 310 |
|
310 | 311 |
public String[] cpos2Str(int[] positions) throws UnexpectedAnswerException, IOException, CqiServerError { |
311 | 312 |
AbstractCqiClient cqiClient = CorpusManager.getCorpusManager().getCqiClient(); |
312 | 313 |
return cqiClient.getSingleData(this, positions).toArray(new String[positions.length]); |
313 | 314 |
} |
315 |
|
|
316 |
/** |
|
317 |
* Very slow |
|
318 |
* |
|
319 |
* @return |
|
320 |
* @throws IOException |
|
321 |
* @throws CqiServerError |
|
322 |
* @throws CqiClientException |
|
323 |
*/ |
|
324 |
public int getNValues() throws IOException, CqiServerError, CqiClientException { |
|
325 |
return getValues().size(); |
|
326 |
} |
|
327 |
|
|
328 |
/** |
|
329 |
* Very slow |
|
330 |
* |
|
331 |
* @return |
|
332 |
* @throws IOException |
|
333 |
* @throws CqiServerError |
|
334 |
* @throws CqiClientException |
|
335 |
*/ |
|
336 |
public String[] getValuesAsStrings() throws IOException, CqiServerError, CqiClientException { |
|
337 |
List<String> values = getValues(); |
|
338 |
return values.toArray(new String[values.size()]); |
|
339 |
} |
|
314 | 340 |
} |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/CQPCorpus.java (revision 3055) | ||
---|---|---|
1484 | 1484 |
|
1485 | 1485 |
return null; |
1486 | 1486 |
} |
1487 |
|
|
1488 |
/** |
|
1489 |
* parse a "struct_prop" property name to find the matching StructuralUnitProperty |
|
1490 |
* |
|
1491 |
* @param p |
|
1492 |
* @return |
|
1493 |
*/ |
|
1494 |
public StructuralUnitProperty getStructuralUnitProperty(String p) { |
|
1495 |
|
|
1496 |
try { |
|
1497 |
return StructuralUnitProperty.stringToStructuralUnitProperty(this, p); |
|
1498 |
} |
|
1499 |
catch (CqiClientException e) { |
|
1500 |
e.printStackTrace(); |
|
1501 |
return null; |
|
1502 |
} |
|
1503 |
} |
|
1487 | 1504 |
} |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/WordProperty.java (revision 3055) | ||
---|---|---|
3 | 3 |
import java.io.IOException; |
4 | 4 |
import java.util.List; |
5 | 5 |
|
6 |
import org.apache.commons.lang.math.IntRange; |
|
7 |
import org.txm.objects.Match; |
|
6 | 8 |
import org.txm.searchengine.cqp.clientExceptions.UnexpectedAnswerException; |
7 | 9 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery; |
10 |
import org.txm.searchengine.cqp.corpus.query.MatchUtils; |
|
8 | 11 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
9 | 12 |
|
10 | 13 |
/** |
... | ... | |
62 | 65 |
} |
63 | 66 |
|
64 | 67 |
public String cpos2Str(int position) throws UnexpectedAnswerException, IOException, CqiServerError { |
65 |
return CorpusManager.getCorpusManager().getCqiClient().cpos2Str(this.getQualifiedName(), new int[] {position})[0];
|
|
68 |
return CorpusManager.getCorpusManager().getCqiClient().cpos2Str(this.getQualifiedName(), new int[] { position })[0];
|
|
66 | 69 |
} |
67 | 70 |
|
68 | 71 |
public String[] cpos2Str(int[] positions) throws UnexpectedAnswerException, IOException, CqiServerError { |
... | ... | |
82 | 85 |
} |
83 | 86 |
|
84 | 87 |
public int str2Id(String value) throws UnexpectedAnswerException, IOException, CqiServerError { |
85 |
return CorpusManager.getCorpusManager().getCqiClient().str2Id(this.getQualifiedName(), new String[] {value})[0];
|
|
88 |
return CorpusManager.getCorpusManager().getCqiClient().str2Id(this.getQualifiedName(), new String[] { value })[0];
|
|
86 | 89 |
} |
87 | 90 |
|
88 | 91 |
public String getCQLTest(String value) { |
89 |
return this.getName() + "=\"" + CQLQuery.addBackSlash(value)+ "\"";
|
|
92 |
return this.getName() + "=\"" + CQLQuery.addBackSlash(value) + "\"";
|
|
90 | 93 |
} |
91 | 94 |
|
92 | 95 |
public String getCQLTest(List<String> values) { |
93 | 96 |
StringBuilder sb = new StringBuilder(); |
94 | 97 |
sb.append(this.getName() + "=\""); //$NON-NLS-1$ |
95 |
for (int i = 0 ; i < values.size() ; i++) {
|
|
98 |
for (int i = 0; i < values.size(); i++) {
|
|
96 | 99 |
String s = values.get(i); |
97 | 100 |
s = CQLQuery.addBackSlash(s); |
98 |
sb.append(s); //$NON-NLS-1$
|
|
99 |
if (i < values.size() -1) { |
|
101 |
sb.append(s); |
|
102 |
if (i < values.size() - 1) {
|
|
100 | 103 |
sb.append("|"); //$NON-NLS-1$ |
101 | 104 |
} |
102 | 105 |
} |
103 | 106 |
sb.append("\""); |
104 | 107 |
return sb.toString(); |
108 |
} |
|
109 |
|
|
110 |
public String[] getValues() throws UnexpectedAnswerException, IOException, CqiServerError { |
|
111 |
int n = CorpusManager.getCorpusManager().getCqiClient().lexiconSize(this.getQualifiedName()); |
|
112 |
int[] ids = MatchUtils.toPositions(0, n - 1); |
|
113 |
return CorpusManager.getCorpusManager().getCqiClient().id2Str(this.getQualifiedName(), ids); |
|
114 |
} |
|
115 |
|
|
116 |
public int getNValues() throws UnexpectedAnswerException, IOException, CqiServerError { |
|
117 |
return CorpusManager.getCorpusManager().getCqiClient().lexiconSize(this.getQualifiedName()); |
|
105 | 118 |
}; |
106 | 119 |
} |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/query/MatchUtils.java (revision 3055) | ||
---|---|---|
70 | 70 |
* @return array of int position from a Match start to another Match end |
71 | 71 |
*/ |
72 | 72 |
public static int[] toPositions(Match match1, Match match2) { |
73 |
int len = match2.end - match1.start + 1; |
|
73 |
return toPositions(match1.start, match2.end); |
|
74 |
} |
|
75 |
|
|
76 |
/** |
|
77 |
* |
|
78 |
* @param match1 |
|
79 |
* @param match2 |
|
80 |
* @return array of int position from a Match start to another Match end |
|
81 |
*/ |
|
82 |
public static int[] toPositions(int from, int to) { |
|
83 |
int len = to - from + 1; |
|
74 | 84 |
|
75 | 85 |
if (len <= 0) { |
76 | 86 |
return new int[0]; |
77 | 87 |
} |
78 | 88 |
else if (len == 1) { |
79 |
return new int[] { match1.start };
|
|
89 |
return new int[] { from };
|
|
80 | 90 |
} |
81 | 91 |
else if (len == 2) { |
82 |
return new int[] { match1.start, match2.end };
|
|
92 |
return new int[] { from, to };
|
|
83 | 93 |
} |
84 | 94 |
else { |
85 | 95 |
int[] positions = new int[len]; |
86 | 96 |
int n = 0; |
87 |
for (int i = match1.start; i <= match2.end; i++) {
|
|
97 |
for (int i = from; i <= to; i++) {
|
|
88 | 98 |
positions[n++] = i; |
89 | 99 |
} |
90 | 100 |
return positions; |
tmp/org.txm.annotation.kr.rcp/src/org/txm/annotation/kr/rcp/commands/RecodeCorpus.java (revision 3055) | ||
---|---|---|
36 | 36 |
import org.txm.utils.logger.Log; |
37 | 37 |
|
38 | 38 |
public class RecodeCorpus extends AbstractHandler { |
39 |
|
|
39 |
|
|
40 | 40 |
public static final String ID = RecodeCorpus.class.getCanonicalName(); |
41 |
|
|
42 |
@Option(name="conversionFile", usage="conversionFile", widget="File", required=true, def="conv.tsv")
|
|
41 |
|
|
42 |
@Option(name = "conversionFile", usage = "conversionFile", widget = "File", required = true, def = "conv.tsv")
|
|
43 | 43 |
protected File conversionFile; |
44 |
@Option(name="oldType", usage="oldType", widget="String", required=true, def="pos") |
|
44 |
|
|
45 |
@Option(name = "oldType", usage = "oldType", widget = "String", required = true, def = "pos") |
|
45 | 46 |
protected String oldType; |
46 |
@Option(name="newType", usage="newType", widget="String", required=true, def="pos2") |
|
47 |
|
|
48 |
@Option(name = "newType", usage = "newType", widget = "String", required = true, def = "pos2") |
|
47 | 49 |
protected String newType; |
48 |
@Option(name="gestionInconnus", usage="oldType", widget="String", required=true, def="abandon") |
|
50 |
|
|
51 |
@Option(name = "gestionInconnus", usage = "how to manage failed conversion", widget = "String", required = true, def = "abandon") |
|
49 | 52 |
protected String gestionInconnus; |
50 |
|
|
53 |
|
|
51 | 54 |
@Override |
52 | 55 |
public Object execute(ExecutionEvent event) throws ExecutionException { |
53 |
|
|
56 |
|
|
54 | 57 |
ISelection isel = HandlerUtil.getCurrentSelection(event); |
55 |
if (isel == null) return null ;
|
|
56 |
if (!(isel instanceof IStructuredSelection)) return null ;
|
|
58 |
if (isel == null) return null; |
|
59 |
if (!(isel instanceof IStructuredSelection)) return null; |
|
57 | 60 |
IStructuredSelection selection = (IStructuredSelection) isel; |
58 | 61 |
Object s = selection.getFirstElement(); |
59 | 62 |
if (!(s instanceof MainCorpus)) return null; |
60 |
|
|
63 |
|
|
61 | 64 |
final MainCorpus corpus = (MainCorpus) s; |
62 |
|
|
65 |
|
|
63 | 66 |
try { |
64 | 67 |
if (ParametersDialog.open(this)) { |
65 |
|
|
66 |
JobHandler job = new JobHandler("Recode corpus "+corpus) { |
|
68 |
|
|
69 |
JobHandler job = new JobHandler("Recode corpus " + corpus) { |
|
70 |
|
|
67 | 71 |
@Override |
68 | 72 |
protected IStatus run(IProgressMonitor monitor) { |
69 | 73 |
try { |
70 | 74 |
recode(corpus, conversionFile, oldType, newType, monitor); |
71 |
} catch (Exception e) { |
|
72 |
System.out.println("Fail to convert properties of corpus "+corpus); |
|
75 |
} |
|
76 |
catch (Exception e) { |
|
77 |
System.out.println("Fail to convert properties of corpus " + corpus); |
|
73 | 78 |
e.printStackTrace(); |
74 | 79 |
return Status.CANCEL_STATUS; |
75 | 80 |
} |
... | ... | |
79 | 84 |
job.startJob(); |
80 | 85 |
return corpus; |
81 | 86 |
} |
82 |
} catch (Exception e) { |
|
87 |
} |
|
88 |
catch (Exception e) { |
|
83 | 89 |
e.printStackTrace(); |
84 | 90 |
} |
85 |
|
|
91 |
|
|
86 | 92 |
return corpus; |
87 | 93 |
} |
88 |
|
|
89 |
public static JobHandler recode(final MainCorpus corpus, CorpusRuledConvertion crc, String oldType, String newType, IProgressMonitor monitor) throws IOException, CqiClientException, XMLStreamException { |
|
90 |
|
|
94 |
|
|
95 |
public static JobHandler recode(final MainCorpus corpus, CorpusRuledConvertion crc, String oldType, String newType, IProgressMonitor monitor) throws IOException, CqiClientException, |
|
96 |
XMLStreamException { |
|
97 |
|
|
91 | 98 |
Project project = corpus.getProject(); |
92 | 99 |
if (!"xtz".equals(project.getImportModuleName())) { |
93 |
System.out.println("Corpus "+corpus+" was imported with XTZ import module");
|
|
100 |
System.out.println("Corpus " + corpus + " was not imported with the XTZ import module");
|
|
94 | 101 |
return null; |
95 | 102 |
} |
96 | 103 |
|
97 | 104 |
Property p1 = corpus.getProperty(oldType); |
98 | 105 |
if (p1 == null) { |
99 |
System.out.println("Corpus "+corpus+" has not property "+oldType);
|
|
106 |
System.out.println("Corpus " + corpus + " has not property " + oldType);
|
|
100 | 107 |
return null; |
101 | 108 |
} |
102 |
|
|
103 |
System.out.println("Recoding "+corpus+" property "+oldType+" in "+newType);
|
|
104 |
|
|
109 |
|
|
110 |
System.out.println("Recoding " + corpus + " property " + oldType + " in " + newType);
|
|
111 |
|
|
105 | 112 |
monitor.beginTask("Recoding corpus properties...", 2); |
106 | 113 |
monitor.setTaskName("Recoding XML-TXM files"); |
107 | 114 |
// apply conversion file |
108 | 115 |
if (!crc.process(corpus)) { |
109 |
System.out.println("Failed to edit XML-TXM of the corpus: "+corpus);
|
|
116 |
System.out.println("Failed to edit XML-TXM of the corpus: " + corpus);
|
|
110 | 117 |
return null; |
111 | 118 |
} |
112 | 119 |
monitor.worked(1); |
113 | 120 |
|
114 | 121 |
// update corpus indexes and edition |
115 |
// String txmhome = Toolbox.getTxmHomePath(); |
|
116 |
|
|
122 |
// String txmhome = Toolbox.getTxmHomePath(); |
|
117 | 123 |
|
118 |
project.setDoMultiThread(false); //too soon |
|
124 |
|
|
125 |
project.setDoMultiThread(false); // too soon |
|
119 | 126 |
project.setDoUpdate(true); |
120 |
|
|
121 |
// monitor.setTaskName("Updating corpus");
|
|
122 |
// File scriptDir = new File(txmhome, "scripts/groovy/user/org/txm/scripts/importer/xtz");
|
|
123 |
// File script = new File(scriptDir, "xtzLoader.groovy");
|
|
127 |
|
|
128 |
// monitor.setTaskName("Updating corpus");
|
|
129 |
// File scriptDir = new File(txmhome, "scripts/groovy/user/org/txm/scripts/importer/xtz");
|
|
130 |
// File script = new File(scriptDir, "xtzLoader.groovy");
|
|
124 | 131 |
System.out.println(NLS.bind(KRAnnotationUIMessages.updatingTheP0Corpus, corpus)); |
125 | 132 |
JobHandler ret = ExecuteImportScript.executeScript(project); |
126 | 133 |
Display.getDefault().syncExec(new Runnable() { |
134 |
|
|
127 | 135 |
@Override |
128 |
public void run() {CloseEditorsUsing.corpus(corpus);} |
|
136 |
public void run() { |
|
137 |
CloseEditorsUsing.corpus(corpus); |
|
138 |
} |
|
129 | 139 |
}); |
130 | 140 |
monitor.worked(1); |
131 | 141 |
return ret; |
132 | 142 |
} |
133 |
|
|
134 |
public static JobHandler recode(final MainCorpus corpus, LinkedHashMap<Pattern, String> rules, String oldType, String newType, IProgressMonitor monitor) throws IOException, CqiClientException, XMLStreamException { |
|
135 |
|
|
143 |
|
|
144 |
public static JobHandler recode(final MainCorpus corpus, LinkedHashMap<Pattern, String> rules, String oldType, String newType, IProgressMonitor monitor) throws IOException, CqiClientException, |
|
145 |
XMLStreamException { |
|
146 |
|
|
136 | 147 |
// apply conversion file |
137 | 148 |
CorpusRuledConvertion crc = new CorpusRuledConvertion(rules, newType, oldType); |
138 |
return recode(corpus, crc, oldType, newType , monitor);
|
|
149 |
return recode(corpus, crc, oldType, newType, monitor); |
|
139 | 150 |
} |
140 |
|
|
151 |
|
|
141 | 152 |
public static JobHandler recode(final MainCorpus corpus, File conversionFile, String oldType, String newType, IProgressMonitor monitor) throws IOException, CqiClientException, XMLStreamException { |
142 |
System.out.println("Create CorpusRuledConvertion with "+conversionFile);
|
|
153 |
System.out.println("Create CorpusRuledConvertion with " + conversionFile);
|
|
143 | 154 |
// apply conversion file |
144 | 155 |
CorpusRuledConvertion crc = new CorpusRuledConvertion(conversionFile, oldType, newType); |
145 |
|
|
156 |
|
|
146 | 157 |
return recode(corpus, crc, oldType, newType, monitor); |
147 | 158 |
} |
148 |
} |
|
159 |
} |
tmp/org.txm.ocaml.core/META-INF/MANIFEST.MF (revision 3055) | ||
---|---|---|
20 | 20 |
Bundle-Name: Ocaml |
21 | 21 |
Bundle-Version: 2.0 |
22 | 22 |
Bundle-ClassPath: .,lib/ocamlrun-scripting.jar,lib/ocaml-lib-files.jar |
23 |
Require-Bundle: org.txm.core,org.eclipse.core.runtime |
|
23 |
Require-Bundle: org.txm.core;visibility:=reexport, |
|
24 |
org.eclipse.core.runtime;visibility:=reexport |
|
24 | 25 |
Bundle-ManifestVersion: 2 |
25 | 26 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.8 |
26 | 27 |
Bundle-Vendor: Textometrie.org |
tmp/org.txm.groovy.core/src/java/org/txm/groovy/core/GSERunner.java (revision 3055) | ||
---|---|---|
22 | 22 |
|
23 | 23 |
import cern.colt.Arrays; |
24 | 24 |
import groovy.lang.Binding; |
25 |
import groovy.lang.GroovyShell; |
|
25 | 26 |
import groovy.util.GroovyScriptEngine; |
26 | 27 |
import groovy.util.ResourceException; |
27 | 28 |
import groovy.util.ScriptException; |
... | ... | |
128 | 129 |
return defaultGSE; |
129 | 130 |
} |
130 | 131 |
|
132 |
public Object evaluate(String code) { |
|
133 |
return new GroovyShell(this.getGroovyClassLoader(), new Binding()).evaluate(code); |
|
134 |
} |
|
135 |
|
|
131 | 136 |
public static void getDependancies(String bundle, HashSet<String> defaultPlugins) { |
132 | 137 |
// if (defaultPlugins.contains(bundle)) return; |
133 | 138 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperprince/hyperprinceLoader.groovy (revision 3055) | ||
---|---|---|
80 | 80 |
//ANNOTATE |
81 | 81 |
println "-- ANNOTATE - Running NLP tools" |
82 | 82 |
if (annotate) { |
83 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
83 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
84 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
84 | 85 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
85 | 86 |
annotationSuccess = true; |
86 | 87 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/frantext/frantextLoader.groovy (revision 3055) | ||
---|---|---|
156 | 156 |
boolean annotate_status = true; |
157 | 157 |
if (annotate) { |
158 | 158 |
println "-- ANNOTATE - Running NLP tools - $model model" |
159 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
159 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
160 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
160 | 161 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
161 | 162 |
annotate_status = true; |
162 | 163 |
if (project.getCleanAfterBuild()) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xml/xmlLoader.groovy (revision 3055) | ||
---|---|---|
237 | 237 |
boolean annotationSuccess = false; |
238 | 238 |
if (annotate) { |
239 | 239 |
println "-- ANNOTATE - Running NLP tools" |
240 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
240 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
241 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
241 | 242 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
242 | 243 |
annotationSuccess = true |
243 | 244 |
if (project.getCleanAfterBuild()) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/bfm/bfmLoader.groovy (revision 3055) | ||
---|---|---|
136 | 136 |
boolean annotate_status = true; |
137 | 137 |
if (annotate) { |
138 | 138 |
println "-- ANNOTATE - Running NLP tools - $model model" |
139 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
139 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
140 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
140 | 141 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
141 | 142 |
annotate_status = true; |
142 | 143 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/doc/docLoader.groovy (revision 3055) | ||
---|---|---|
257 | 257 |
boolean annotationSuccess = false; |
258 | 258 |
if (annotate) { |
259 | 259 |
println "-- ANNOTATE - Running NLP tools" |
260 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
260 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
261 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
261 | 262 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
262 | 263 |
annotationSuccess = true; |
263 | 264 |
if (project.getCleanAfterBuild()) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/tmx/tmxLoader.groovy (revision 3055) | ||
---|---|---|
87 | 87 |
if (MONITOR != null) MONITOR.worked(20, "ANNOTATE - Running NLP tools"); |
88 | 88 |
if (annotate) { |
89 | 89 |
println "-- ANNOTATE - Running NLP tools" |
90 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
90 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
91 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
91 | 92 |
def mapForTreeTagger = [:] |
92 | 93 |
for (def k : textLangs.keySet()) mapForTreeTagger[k] = textLangs[k].toLowerCase(); |
93 | 94 |
println "TreeTagger models to use per text: $mapForTreeTagger" |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/factiva/factivamailLoader.groovy (revision 3055) | ||
---|---|---|
104 | 104 |
boolean annotationSuccess = false; |
105 | 105 |
if (annotate) { |
106 | 106 |
println "-- ANNOTATE - Running NLP tools" |
107 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
107 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
108 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
108 | 109 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
109 | 110 |
annotationSuccess = true; |
110 | 111 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/factiva/factivaLoader.groovy (revision 3055) | ||
---|---|---|
215 | 215 |
boolean annotationSuccess = false; |
216 | 216 |
if (annotate) { |
217 | 217 |
println "-- ANNOTATE - Running NLP tools" |
218 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
218 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
219 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
219 | 220 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
220 | 221 |
annotationSuccess = true; |
221 | 222 |
if (project.getCleanAfterBuild()) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/transcriber/transcriberLoader.groovy (revision 3055) | ||
---|---|---|
191 | 191 |
boolean annotationSuccess = false; |
192 | 192 |
if (annotate) { |
193 | 193 |
println "-- ANNOTATE - Running NLP tools" |
194 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
194 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
195 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
195 | 196 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
196 | 197 |
annotationSuccess = true; |
197 | 198 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/txt/txtLoader.groovy (revision 3055) | ||
---|---|---|
133 | 133 |
boolean annotationSuccess = true; |
134 | 134 |
if (annotate) { |
135 | 135 |
println "-- ANNOTATE - Running NLP tools" |
136 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
136 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
137 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
137 | 138 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
138 | 139 |
annotationSuccess = true; |
139 | 140 |
if (project.getCleanAfterBuild()) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/alcesteLoader.groovy (revision 3055) | ||
---|---|---|
90 | 90 |
|
91 | 91 |
if (annotate) { |
92 | 92 |
println "-- ANNOTATE - Running NLP tools" |
93 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
93 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
94 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
94 | 95 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
95 | 96 |
annotationSuccess = true; |
96 | 97 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/alceste/importer.groovy (revision 3055) | ||
---|---|---|
47 | 47 |
import java.io.PrintStream; |
48 | 48 |
import java.net.URL; |
49 | 49 |
|
50 |
import org.txm.core.engines* |
|
50 | 51 |
import org.txm.scripts.filters.CutHeader.*; |
51 | 52 |
import org.txm.scripts.filters.Tokeniser.*; |
52 | 53 |
import org.txm.scripts.filters.FusionHeader.*; |
... | ... | |
108 | 109 |
cpb.tick() |
109 | 110 |
File resultfile = new File(tokenizeDir, f.getName()); |
110 | 111 |
try { |
111 |
def builder = new SimpleTokenizerXml(f, resultfile, lang); |
|
112 |
builder.setStartTag("text") |
|
113 |
if (!builder.process()) { |
|
112 |
def tokenizer = new SimpleTokenizerXml(f, resultfile, lang); |
|
113 |
if (project.getAnnotate()) { // an annotation will be done, does the annotation engine needs another tokenizer ? |
|
114 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
115 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
116 |
def stringTokenizer = engine.getStringTokenizer(lang) |
|
117 |
if (stringTokenizer != null) { |
|
118 |
tokenizer.setStringTokenizer(stringTokenizer) |
|
119 |
} |
|
120 |
} |
|
121 |
tokenizer.setStartTag("text") |
|
122 |
if (!tokenizer.process()) { |
|
114 | 123 |
println "Failed to tokenize: "+f; |
115 | 124 |
resultfile.delete(); |
116 | 125 |
} |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperbase/importer.groovy (revision 3055) | ||
---|---|---|
51 | 51 |
import org.txm.*; |
52 | 52 |
import org.txm.core.engines.*; |
53 | 53 |
import org.txm.utils.*; |
54 |
// TODO: Auto-generated Javadoc |
|
55 | 54 |
|
56 | 55 |
/** |
57 | 56 |
* The Class importer. |
... | ... | |
66 | 65 |
* @param basename the basename |
67 | 66 |
* @return true, if successful |
68 | 67 |
*/ |
69 |
public static boolean run(File srcDir, File binDir, File txmDir, String encoding, String basename, String lang) |
|
68 |
public static boolean run(File srcDir, File binDir, File txmDir, String encoding, String basename, String lang, def project)
|
|
70 | 69 |
{ |
71 | 70 |
String rootDir = srcDir.getAbsolutePath()+"/" |
72 | 71 |
//cleaning |
... | ... | |
113 | 112 |
File tfile = new File(binDir, "tokenized/"+filename+".xml"); |
114 | 113 |
|
115 | 114 |
SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(pfile, tfile, lang); |
115 |
if (project.getAnnotate()) { // an annotation will be done, does the annotation engine needs another tokenizer ? |
|
116 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
117 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
118 |
def stringTokenizer = engine.getStringTokenizer(lang) |
|
119 |
if (stringTokenizer != null) { |
|
120 |
tokenizer.setStringTokenizer(stringTokenizer) |
|
121 |
} |
|
122 |
} |
|
116 | 123 |
if (!tokenizer.process()) { |
117 | 124 |
println("Failed to tokenize file: "+pfile); |
118 | 125 |
tfile.delete(); |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/hyperbase/hyperbaseLoader.groovy (revision 3055) | ||
---|---|---|
78 | 78 |
if (MONITOR != null && MONITOR.isCanceled()) { return MONITOR.done(); } |
79 | 79 |
println "-- IMPORTER - Reading source files" |
80 | 80 |
|
81 |
if (!(new importer().run(srcDir, binDir, txmDir, encoding, basename, lang))) { |
|
81 |
if (!(new importer().run(srcDir, binDir, txmDir, encoding, basename, lang, project))) {
|
|
82 | 82 |
println "import process stopped"; |
83 | 83 |
return; |
84 | 84 |
} |
... | ... | |
88 | 88 |
println "-- ANNOTATE - Running NLP tools" |
89 | 89 |
boolean annotationSuccess = false; |
90 | 90 |
if (annotate) { |
91 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
91 |
String engineName = project.getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
92 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
92 | 93 |
if (engine.processDirectory(txmDir, binDir, ["lang":model])) { |
93 | 94 |
annotationSuccess = true; |
94 | 95 |
if (project.getCleanAfterBuild()) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/XTZImporter.groovy (revision 3055) | ||
---|---|---|
26 | 26 |
import org.txm.utils.ConsoleProgressBar |
27 | 27 |
import org.txm.utils.io.* |
28 | 28 |
import org.txm.importer.xtz.* |
29 |
import org.txm.core.engines.* |
|
29 | 30 |
|
30 | 31 |
class XTZImporter extends Importer { |
31 | 32 |
|
... | ... | |
381 | 382 |
File infile = f; |
382 | 383 |
File outfile = new File(module.getBinaryDirectory(),"tokenized/"+f.getName()); |
383 | 384 |
SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(infile, outfile, lang) |
385 |
if (module.getProject().getAnnotate()) { // an annotation will be done, does the annotation engine needs another tokenizer ? |
|
386 |
String engineName = module.getProject().getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
387 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
388 |
def stringTokenizer = engine.getStringTokenizer(lang) |
|
389 |
if (stringTokenizer != null) { |
|
390 |
tokenizer.setStringTokenizer(stringTokenizer) |
|
391 |
} |
|
392 |
} |
|
384 | 393 |
tokenizer.setRetokenize(retokenize) |
385 | 394 |
if (outSideTextTagsRegex != null && outSideTextTagsRegex.trim().length() > 0) { |
386 | 395 |
tokenizer.setOutSideTextTags(outSideTextTagsRegex) |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/importer/xtz/TTAnnotater.groovy (revision 3055) | ||
---|---|---|
28 | 28 |
String corpusname = module.getProject().getName(); |
29 | 29 |
String lang = module.getProject().getLang(); |
30 | 30 |
|
31 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine("TreeTagger") |
|
31 |
String engineName = module.getProject().getImportParameters().node("annotate").get("engine", "TreeTagger") |
|
32 |
def engine = Toolbox.getEngineManager(EngineType.ANNOTATION).getEngine(engineName) |
|
32 | 33 |
if (module.isMultiThread()) { |
33 | 34 |
def hash = [:] |
34 | 35 |
for (File txmFile : inputDirectory.listFiles()) { |
tmp/org.txm.groovy.core/src/groovy/org/txm/scripts/filters/Tokeniser/SimpleTokenizerXml.groovy (revision 3055) | ||
---|---|---|
52 | 52 |
import javax.xml.stream.* |
53 | 53 |
|
54 | 54 |
import org.txm.importer.PersonalNamespaceContext |
55 |
import org.txm.tokenizer.StringTokenizer |
|
56 |
import org.txm.tokenizer.SimpleStringTokenizer |
|
55 | 57 |
import org.txm.tokenizer.TokenizerClasses |
56 | 58 |
|
57 | 59 |
@CompileStatic |
58 | 60 |
public class SimpleTokenizerXml { |
59 | 61 |
|
62 |
StringTokenizer stringTokenizer; |
|
60 | 63 |
boolean retokenize = false |
61 | 64 |
LinkedHashMap<String, String>retokenizedWordProperties = new LinkedHashMap() |
62 | 65 |
|
... | ... | |
67 | 70 |
/** The intraword_tags. */ |
68 | 71 |
String intraword_tags = TokenizerClasses.intraword_tags; |
69 | 72 |
|
70 |
/** The punct_strong. */ |
|
71 |
String punct_strong = TokenizerClasses.punct_strong; |
|
72 |
|
|
73 |
/** The punct_all. */ |
|
74 |
String punct_all = TokenizerClasses.punct_all; |
|
75 |
|
|
76 | 73 |
/** The word_chars. */ |
77 | 74 |
String word_chars = TokenizerClasses.word_chars; |
78 | 75 |
|
79 |
String fclitics = null; // default behavior don't manage clitics |
|
80 |
String pclitics = null; // default behavior don't manage clitics |
|
81 |
|
|
82 | 76 |
/** The outside_text_tags_ignore_content. */ |
83 | 77 |
String note_content = null; |
84 | 78 |
String outside_text_tags_keep_content = null // tag and content NOT removed but not tokenized |
... | ... | |
89 | 83 |
Pattern reg_outside_text_tags; |
90 | 84 |
Pattern reg_startTag; |
91 | 85 |
|
92 |
Pattern reg_punct_other = Pattern.compile("\\p{P}") |
|
93 |
|
|
94 | 86 |
/** The DEBUG. */ |
95 | 87 |
public boolean DEBUG = false; |
96 | 88 |
|
97 |
/** The outfile. */ |
|
89 |
/** The outfile is the result file. */
|
|
98 | 90 |
File outfile; |
99 | 91 |
|
100 | 92 |
/** The infile. */ |
... | ... | |
119 | 111 |
String prefix; |
120 | 112 |
String filename; |
121 | 113 |
|
122 |
def regElision = null; |
|
123 |
def reg3pts = null; |
|
124 |
def regPunct; |
|
125 |
def regFClitics = null ; |
|
126 |
def regPClitics = null ; |
|
127 |
String whitespaces; |
|
128 |
Pattern regWhitespaces; |
|
129 | 114 |
Pattern regLN; |
130 | 115 |
Pattern regCTRL; |
131 |
Pattern regSplitWhiteSpaces; |
|
132 | 116 |
|
133 | 117 |
public SimpleTokenizerXml(File infile, File outfile) { |
134 | 118 |
this(infile, outfile, ""); |
... | ... | |
142 | 126 |
*/ |
143 | 127 |
public SimpleTokenizerXml(File infile, File outfile, String lang) { |
144 | 128 |
this.lang = lang; |
145 |
if (lang != null) |
|
146 |
if (lang.startsWith("en")) { |
|
147 |
fclitics = TokenizerClasses.FClitic_en |
|
148 |
} else if (lang.startsWith("fr")) { |
|
149 |
fclitics = TokenizerClasses.FClitic_fr |
|
150 |
pclitics = TokenizerClasses.PClitic_fr |
|
151 |
} else if (lang.startsWith("gl")) { |
|
152 |
fclitics = TokenizerClasses.FClitic_gl |
|
153 |
} else if (lang.startsWith("it")) { |
|
154 |
pclitics = TokenizerClasses.PClitic_it |
|
155 |
} |
|
129 |
this.stringTokenizer = new SimpleStringTokenizer(lang); |
|
156 | 130 |
|
157 | 131 |
word_tags = TokenizerClasses.word_tags; |
158 | 132 |
reg_word_tags = Pattern.compile(word_tags); |
159 | 133 |
|
160 | 134 |
intraword_tags = TokenizerClasses.intraword_tags; |
161 |
punct_strong = TokenizerClasses.punct_strong; |
|
162 |
punct_all = TokenizerClasses.punct_all; |
|
163 | 135 |
word_chars = TokenizerClasses.word_chars; |
164 | 136 |
|
165 | 137 |
this.outfile = outfile; |
... | ... | |
168 | 140 |
int index = filename.lastIndexOf("."); |
169 | 141 |
if (index > 0) filename = filename.substring(0, index); |
170 | 142 |
|
171 |
String strRegElision = TokenizerClasses.regElision; |
|
172 |
if (strRegElision != null && strRegElision.length() > 0) |
|
173 |
regElision = ~/^([\p{L}-]++${strRegElision}[\p{L}-]++)(.*)$/ // the test must start with the start of string ^ |
|
174 |
reg3pts = ~/^(.*?)(\.\.\.)(.*)$/ |
|
175 |
|
|
176 |
String strRegPunct = TokenizerClasses.regPunct; |
|
177 |
if (strRegPunct != null && strRegPunct.length() > 0) |
|
178 |
regPunct = ~/^(.*?)($strRegPunct)(.*)$/ |
|
179 |
|
|
180 |
if (fclitics != null && fclitics.length() > 0) |
|
181 |
regFClitics = ~/(.+)($fclitics)$/ // the test must end with the end of string $ |
|
182 |
|
|
183 |
if (pclitics != null && pclitics.length() > 0) |
|
184 |
regPClitics = /^($pclitics)(.*)/ // the test must start with the start of string ^ |
|
185 |
|
|
186 |
if (TokenizerClasses.whitespaces != null && TokenizerClasses.whitespaces.length() > 0) |
|
187 |
regWhitespaces = ~TokenizerClasses.whitespaces; |
|
188 |
|
|
189 |
if (TokenizerClasses.whitespaces != null && TokenizerClasses.whitespaces.length() > 0) |
|
190 |
regSplitWhiteSpaces = Pattern.compile(TokenizerClasses.whitespaces); |
|
191 |
|
|
192 | 143 |
regLN = Pattern.compile("/\n/"); |
193 | 144 |
regCTRL = Pattern.compile("/\\p{C}/"); |
194 | 145 |
} |
195 | 146 |
|
196 | 147 |
/** |
148 |
* Replace the default SimpleStringTokenizer with another |
|
149 |
* @param stringTokenizer a StringTokenizer |
|
150 |
*/ |
|
151 |
public void seStringTokenizer(StringTokenizer stringTokenizer) { |
|
152 |
if (stringTokenizer == null) return; |
|
153 |
this.stringTokenizer = stringTokenizer; |
|
154 |
} |
|
155 |
|
|
156 |
/** |
|
197 | 157 |
* Fill infos. |
198 | 158 |
* |
199 | 159 |
* @param event the event |
... | ... | |
545 | 505 |
//if (DEBUG) println "-- chars: "+text+"--"; |
546 | 506 |
text = regLN.matcher(text).replaceAll(WHITESPACE); |
547 | 507 |
text = regCTRL.matcher(text).replaceAll(EMPTY); // remove ctrl characters |
548 |
if (regSplitWhiteSpaces != null) { |
|
549 |
for (String s : regSplitWhiteSpaces.split(text)) { // separate with unicode white spaces |
|
550 |
// if (DEBUG){println "process $s"} |
|
551 |
iterate(s); |
|
552 |
} |
|
553 |
} else { |
|
554 |
iterate(text); |
|
555 |
} |
|
556 |
} |
|
557 |
|
|
558 |
/** |
|
559 |
* Iterate. a String, should be called when a word is found in a String |
|
560 |
* |
|
561 |
* @param s the s |
|
562 |
* @return the java.lang. object |
|
563 |
*/ |
|
564 |
protected iterate(String s) { |
|
565 |
while (s != null && s.length() > 0) { |
|
566 |
// if (DEBUG){println " > $s"} |
|
567 |
s = standardChecks(s); |
|
568 |
} |
|
569 |
} |
|
570 |
|
|
571 |
/** |
|
572 |
* Standard checks. |
|
573 |
* |
|
574 |
* @param s the s |
|
575 |
* @return the java.lang. object |
|
576 |
*/ |
|
577 |
// @CompileStatic(SKIP) |
|
578 |
public String standardChecks(String s) { |
|
579 |
Matcher m; |
|
580 |
if (fclitics != null && (m = s =~ regFClitics) ) { |
|
581 |
// if (DEBUG) println "CLITIC found: $s ->"+ m |
|
582 |
iterate(m.group(1)) |
|
583 |
|
|
508 |
|
|
509 |
def words = stringTokenizer.processText(text); |
|
510 |
for (def word : words) { |
|
584 | 511 |
wordcount++; |
585 | 512 |
writer.writeStartElement(word_element_to_create); |
586 | 513 |
writeWordAttributes();// id |
587 |
//writer.writeAttribute("type", "w"); |
|
588 |
writer.writeCharacters(m.group(2)); |
|
514 |
writer.writeCharacters(word); |
|
589 | 515 |
writer.writeEndElement(); |
590 | 516 |
writer.writeCharacters("\n"); |
591 |
|
|
592 |
return ""; |
|
593 |
} else if (pclitics != null && (m = s =~ regPClitics) ) { |
|
594 |
if (DEBUG) println "PCLITIC found: $s ->"+ m |
|
595 |
|
|
596 |
wordcount++; |
|
597 |
writer.writeStartElement(word_element_to_create); |
|
598 |
writeWordAttributes();// id |
|
599 |
//writer.writeAttribute("type", "w"); |
|
600 |
writer.writeCharacters(m.group(1)); |
|
601 |
writer.writeEndElement(); |
|
602 |
writer.writeCharacters("\n"); |
|
603 |
|
|
604 |
iterate(m.group(2)) |
|
605 |
|
|
606 |
return ""; |
|
607 |
} else if (regElision != null && (m = s =~ regElision) ) { |
|
608 |
// if (DEBUG) println "Elision found: $s ->"+ m.group(1)+" + "+m.group(2)+" + "+m.group(3) |
|
609 |
//iterate(m.group(1)) |
|
610 |
|
|
611 |
// int sep = s.indexOf("'"); |
|
612 |
// if (sep < 0) |
|
613 |
// sep = s.indexOf("’"); |
|
614 |
// if (sep < 0) |
|
615 |
// sep = s.indexOf("‘"); |
|
616 |
|
|
617 |
wordcount++; |
|
618 |
writer.writeStartElement(word_element_to_create); |
|
619 |
writeWordAttributes();// id |
|
620 |
//writer.writeAttribute("type", "w"); |
|
621 |
writer.writeCharacters(m.group(1)); |
|
622 |
writer.writeEndElement(); |
|
623 |
writer.writeCharacters("\n"); |
|
624 |
|
|
625 |
iterate(m.group(2)) |
|
626 |
|
|
627 |
return ""; |
|
628 |
} else if (reg3pts != null && (m = s =~ reg3pts) ) { |
|
629 |
// if(DEBUG){println "REG '...' found: $s -> "+m.group(1)+" + "+m.group(2)+" + "+m.group(3)} |
|
630 |
iterate(m.group(1)) |
|
631 |
|
|
632 |
wordcount++; |
|
633 |
writer.writeStartElement(word_element_to_create); |
|
634 |
writeWordAttributes();// id |
|
635 |
//writer.writeAttribute("type","pon"); |
|
636 |
writer.writeCharacters("..."); |
|
637 |
writer.writeEndElement(); |
|
638 |
writer.writeCharacters("\n"); |
|
639 |
|
|
640 |
return m.group(3); |
|
641 |
} else if (regPunct != null && (m = s =~ regPunct) ) { |
|
642 |
if(DEBUG){println "PUNCT found: $s ->"+m.group(1)+" + "+m.group(2)+" + "+m.group(3)} |
|
643 |
iterate(m.group(1)); |
|
644 |
|
|
645 |
wordcount++; |
|
646 |
writer.writeStartElement(word_element_to_create); |
|
647 |
writeWordAttributes();// id |
|
648 |
//writer.writeAttribute("type","pon"); |
|
649 |
writer.writeCharacters(m.group(2)); |
|
650 |
writer.writeEndElement(); |
|
651 |
writer.writeCharacters("\n"); |
|
652 |
|
|
653 |
return m.group(3); |
|
654 |
} else { |
|
655 |
// if(DEBUG){println "Other found: "+s} |
|
656 |
wordcount++; |
|
657 |
writer.writeStartElement(word_element_to_create); |
|
658 |
writeWordAttributes();// id |
|
659 |
// if (reg_punct_other.matcher(s).matches()) |
|
660 |
// writer.writeAttribute("type","pon"); |
|
661 |
// else |
|
662 |
// writer.writeAttribute("type","w"); |
|
663 |
writer.writeCharacters(s); |
|
664 |
writer.writeEndElement(); |
|
665 |
writer.writeCharacters("\n"); |
|
666 |
|
|
667 |
return ""; |
|
668 | 517 |
} |
669 | 518 |
} |
670 |
|
|
519 |
|
|
671 | 520 |
/** |
672 | 521 |
* Write word attributes. |
673 | 522 |
* |
... | ... | |
783 | 632 |
// println "Done" |
784 | 633 |
|
785 | 634 |
String lang = "fr" |
786 |
File inFile = new File(System.getProperty("user.home"), "xml/vocapia/test.trs")
|
|
787 |
File outFile = new File(System.getProperty("user.home"), "xml/vocapia/test-retokenized.trs")
|
|
635 |
File inFile = new File(System.getProperty("user.home"), "SVN/TXMSVN/trunk/corpora/tokenizer/test1.xml")
|
|
636 |
File outFile = new File(System.getProperty("user.home"), "SVN/TXMSVN/trunk/corpora/tokenizer/test1-tmp.xml")
|
|
788 | 637 |
|
789 | 638 |
println "processing "+inFile |
790 | 639 |
|
791 | 640 |
SimpleTokenizerXml tokenizer = new SimpleTokenizerXml(inFile, outFile, lang) |
792 |
tokenizer.setRetokenize(true) |
|
641 |
tokenizer.setRetokenize(false) |
|
642 |
tokenizer.setNote("note") |
|
643 |
//tokenizer.setOutSideTextTags("outsideToEdit") |
|
644 |
tokenizer.setOutSideTextTagsAndKeepContent("outsideToEdit") |
|
793 | 645 |
//tokenizer.setDEBUG false |
794 | 646 |
tokenizer.process(); |
795 | 647 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/annotation/InjectWordPropTableMacro.groovy (revision 3055) | ||
---|---|---|
81 | 81 |
|
82 | 82 |
println "Injecting..." |
83 | 83 |
//for (String textid : injections.keySet()) { |
84 |
for (File currentXMLFile : inputDirectory.listFiles()) { |
|
84 |
for (File currentXMLFile : inputDirectory.listFiles().sort{ it.name }) {
|
|
85 | 85 |
if (currentXMLFile.isDirectory()) continue; |
86 | 86 |
if (currentXMLFile.isHidden()) continue; |
87 | 87 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/GetXPathMacro.groovy (revision 3055) | ||
---|---|---|
53 | 53 |
|
54 | 54 |
if (srcDirectory!=null && srcDirectory.exists()) { |
55 | 55 |
|
56 |
def files = srcDirectory.listFiles() |
|
56 |
def files = srcDirectory.listFiles().sort{ it.name }
|
|
57 | 57 |
if (files == null || files.size() == 0) { |
58 | 58 |
println "No files in $srcDirectory" |
59 | 59 |
return; |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/xml/XSL2CQPMacro.groovy (revision 3055) | ||
---|---|---|
42 | 42 |
return false; |
43 | 43 |
} |
44 | 44 |
|
45 |
def xmlFiles = txmDir.listFiles(); |
|
45 |
def xmlFiles = txmDir.listFiles().sort{ it.name };
|
|
46 | 46 |
if (xmlFiles == null || xmlFiles.size() == 0) { |
47 | 47 |
println "Error: no file found in $txmDir" |
48 | 48 |
return false; |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/xml/ApplyXQueryMacro.groovy (revision 3055) | ||
---|---|---|
53 | 53 |
return false; |
54 | 54 |
} |
55 | 55 |
|
56 |
def xmlFiles = txmDir.listFiles(); |
|
56 |
def xmlFiles = txmDir.listFiles().sort{ it.name };
|
|
57 | 57 |
if (xmlFiles == null || xmlFiles.size() == 0) { |
58 | 58 |
println "Error: no file found in $txmDir" |
59 | 59 |
return false; |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/ApplyXQueryMacro.groovy (revision 3055) | ||
---|---|---|
53 | 53 |
return false; |
54 | 54 |
} |
55 | 55 |
|
56 |
def xmlFiles = txmDir.listFiles(); |
|
56 |
def xmlFiles = txmDir.listFiles().sort{ it.name };
|
|
57 | 57 |
if (xmlFiles == null || xmlFiles.size() == 0) { |
58 | 58 |
println "Error: no file found in $txmDir" |
59 | 59 |
return false; |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/file/DirectoryInfoMacro.groovy (revision 3055) | ||
---|---|---|
29 | 29 |
println " write? "+directory.canWrite() |
30 | 30 |
println " executable? "+directory.canExecute() |
31 | 31 |
println " hidden? "+directory.isHidden() |
32 |
def files = directory.listFiles() |
|
32 |
def files = directory.listFiles().sort{ it.name }
|
|
33 | 33 |
println " number of files? "+files.size() |
34 | 34 |
|
35 | 35 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/FindMultiLineRegExpMacro.groovy (revision 3055) | ||
---|---|---|
66 | 66 |
|
67 | 67 |
println "-- looking for: $regexp" |
68 | 68 |
|
69 |
for (File inputfile : inputDirectory.listFiles()) { |
|
69 |
for (File inputfile : inputDirectory.listFiles().sort{ it.name }) {
|
|
70 | 70 |
|
71 | 71 |
if (inputfile.isDirectory()) continue // ignore |
72 | 72 |
|
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/nlp/TT2XMLInDirectoryMacro.groovy (revision 3055) | ||
---|---|---|
26 | 26 |
|
27 | 27 |
XMLOutputFactory factory = XMLOutputFactory.newInstance() |
28 | 28 |
|
29 |
def files = inputDirectory.listFiles() |
|
29 |
def files = inputDirectory.listFiles().sort{ it.name }
|
|
30 | 30 |
if (files == null || files.length == 0) { |
31 | 31 |
println "Error: no file to process in $inputDirectory" |
32 | 32 |
return false; |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/antract/FixINATRSMacro.groovy (revision 3055) | ||
---|---|---|
14 | 14 |
if (!ParametersDialog.open(this)) return; |
15 | 15 |
|
16 | 16 |
println "Fixing $trsDirectory" |
17 |
def files = trsDirectory.listFiles() |
|
17 |
def files = trsDirectory.listFiles().sort{ it.name }
|
|
18 | 18 |
if (files == null) { |
19 | 19 |
println "No files found in $trsDirectory" |
20 | 20 |
return 0; |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/imports/TeiHeader2MetadataCSVMacro.groovy (revision 3055) | ||
---|---|---|
39 | 39 |
return |
40 | 40 |
} |
41 | 41 |
|
42 |
List<File> files = inputDirectory.listFiles() |
|
42 |
List<File> files = inputDirectory.listFiles().sort{ it.name }
|
|
43 | 43 |
if (files == null || files.size() == 0) { |
44 | 44 |
println "** TeiHeader2MetadataCSV: no files found in '$inputDirectory' directory." |
45 | 45 |
return |
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/xml/CombineAttributesMacro.groovy (revision 3055) | ||
---|---|---|
1 |
// STANDARD DECLARATIONS |
|
2 |
package org.txm.macro.xml |
|
3 |
|
|
4 |
import org.txm.annotation.kr.core.conversion.* |
|
5 |
import javax.xml.stream.XMLStreamReader |
|
6 |
import java.io.File; |
|
7 |
import java.io.IOException; |
|
8 |
import java.util.HashSet; |
|
9 |
import java.util.LinkedHashMap; |
|
10 |
import java.util.regex.Pattern; |
|
11 |
|
|
12 |
@Field @Option(name="xmlFile", usage="fichier XML lu", widget="FileOpen", required=true, def="in.xml") // /home/mdecorde/TEMP/ANTRACT/AF/all.xlsx |
|
13 |
File xmlFile |
|
14 |
|
|
15 |
@Field @Option(name="resultFile", usage="fichier XML resultat", widget="FileSave", required=true, def="out.xml") // /home/mdecorde/TEMP/ANTRACT/AF/all.xlsx |
|
16 |
File resultFile |
|
17 |
|
|
18 |
@Field @Option(name = "elementName", usage = "oldType", widget = "String", required = true, def = "w") |
|
19 |
String elementName |
|
20 |
|
|
21 |
//@Field @Option(name = "oldType", usage = "oldType", widget = "String", required = true, def = "myattr") |
|
22 |
def oldType = null // not needed |
|
23 |
|
|
24 |
@Field @Option(name = "newType", usage = "newType", widget = "String", required = true, def = "myattr") |
|
25 |
String newType |
|
26 |
|
|
27 |
@Field @Option(name = "separator", usage = "newType", widget = "Separator", required = false, def = "Code") |
|
28 |
def separator |
|
29 |
|
|
30 |
@Field @Option(name = "getValueCode", usage = "code of the getValue(XMLStreamReader parser, String localname, String attribute, String value) method", widget = "Text", required = true, def = "\"NEWGROOVE\" + parser.getAttributeValue(null, \"myattr\") ") |
|
31 |
String getValueCode |
|
32 |
|
|
33 |
@Field @Option(name = "additionalImportsCode", usage = "code of the getValue(XMLStreamReader parser, String localname, String attribute, String value) method", widget = "Text", required = true, def = "import java.io.File") |
|
34 |
String additionalImportsCode |
|
35 |
|
|
36 |
@Field @Option(name = "debug", usage = "show debug messages", widget = "Boolean", required = true, def = "false") |
|
37 |
boolean debug |
|
38 |
|
|
39 |
if (!ParametersDialog.open(this)) return; |
|
40 |
|
|
41 |
|
|
42 |
// not needed |
|
43 |
LinkedHashMap<Pattern, String> rules = new LinkedHashMap<>(); |
|
44 |
|
|
45 |
XMLTXMFileRuledConversion converter = new XMLTXMFileRuledConversion(xmlFile, rules, elementName, oldType, newType, XMLTXMFileRuledConversion.ABANDON); |
|
46 |
|
|
47 |
// initialize a converter using getValueCode value to fill the getValue(parser, localname, attribute, value) method |
|
48 |
converter.converter = gse.evaluate("""import org.txm.annotation.kr.core.conversion.*; |
|
49 |
import javax.xml.stream.XMLStreamReader |
|
50 |
$additionalImportsCode |
|
51 |
|
|
52 |
return new XMLTXMFileRuledConversion.Converter() { |
|
53 |
public String getValue(XMLStreamReader parser, String localname, String attribute, String value) { |
|
54 |
$getValueCode |
|
55 |
} |
|
56 |
}; |
|
57 |
"""); |
|
58 |
converter.debug = debug |
|
59 |
converter.process(resultFile) |
tmp/org.txm.groovy.core/META-INF/MANIFEST.MF (revision 3055) | ||
---|---|---|
4 | 4 |
Bundle-Version: 1.0.0.qualifier |
5 | 5 |
Bundle-Name: Macro |
6 | 6 |
Bundle-ClassPath: .,bin/ |
7 |
Require-Bundle: org.txm.core;bundle-version="0.8.0";visibility:=reexpo |
|
8 |
rt,org.txm.annotation.core;visibility:=reexport,org.txm.searchengine. |
|
9 |
cqp.core;visibility:=reexport,org.txm.tokenizer.core;visibility:=reex |
|
10 |
port,org.eclipse.osgi |
|
7 |
Require-Bundle: org.txm.core;bundle-version="0.8.0";visibility:=reexport, |
|
8 |
org.txm.annotation.core;visibility:=reexport, |
|
9 |
org.txm.searchengine.cqp.core;visibility:=reexport, |
|
10 |
org.txm.tokenizer.core;visibility:=reexport, |
|
11 |
org.eclipse.osgi;visibility:=reexport |
|
11 | 12 |
Bundle-ActivationPolicy: lazy |
12 | 13 |
Bundle-ManifestVersion: 2 |
13 | 14 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.8 |
tmp/org.txm.annotation.kr.core/META-INF/MANIFEST.MF (revision 3055) | ||
---|---|---|
1 | 1 |
Manifest-Version: 1.0 |
2 | 2 |
Automatic-Module-Name: org.txm.annotation.kr.core |
3 | 3 |
Bundle-SymbolicName: org.txm.annotation.kr.core;singleton:=true |
4 |
Require-Bundle: org.txm.searchengine.cqp.core;visibility:=reexport,jav
|
|
5 |
ax.persistence;bundle-version="2.1.0";visibility:=reexport,org.eclips
|
|
6 |
e.persistence.jpa;bundle-version="2.6.0";visibility:=reexport,org.ecl
|
|
7 |
ipse.persistence.jpa.jpql;bundle-version="2.6.0";visibility:=reexport
|
|
8 |
,org.txm.annotation.core;visibility:=reexport,org.eclipse.persistence
|
|
9 |
.asm;bundle-version="6.0.0"
|
|
4 |
Require-Bundle: org.txm.searchengine.cqp.core;visibility:=reexport, |
|
5 |
javax.persistence;bundle-version="2.1.0";visibility:=reexport,
|
|
6 |
org.eclipse.persistence.jpa;bundle-version="2.6.0";visibility:=reexport,
|
|
7 |
org.eclipse.persistence.jpa.jpql;bundle-version="2.6.0";visibility:=reexport,
|
|
8 |
org.txm.annotation.core;visibility:=reexport,
|
|
9 |
org.eclipse.persistence.asm;bundle-version="6.0.0";visibility:=reexport
|
|
10 | 10 |
Meta-Persistence: META-INF/persistence.xml |
11 | 11 |
Bundle-ActivationPolicy: lazy |
12 | 12 |
Bundle-ManifestVersion: 2 |
tmp/org.txm.annotation.kr.core/src/org/txm/annotation/kr/core/conversion/XMLTXMFileRuledConversion.java (revision 3055) | ||
---|---|---|
7 | 7 |
import java.util.regex.Pattern; |
8 | 8 |
|
9 | 9 |
import javax.xml.stream.XMLStreamException; |
10 |
import javax.xml.stream.XMLStreamReader; |
|
10 | 11 |
|
11 | 12 |
import org.txm.importer.StaxIdentityParser; |
12 | 13 |
|
13 | 14 |
public class XMLTXMFileRuledConversion extends StaxIdentityParser { |
14 | 15 |
|
15 |
protected LinkedHashMap<Pattern, String> rules = new LinkedHashMap<Pattern, String>();
|
|
16 |
public boolean debug = false;
|
|
16 | 17 |
|
18 |
protected LinkedHashMap<Pattern, String> rules = new LinkedHashMap<>(); |
|
19 |
|
|
20 |
protected String elementName = null; |
|
21 |
|
|
22 |
boolean fixingElement = false; // true when the current element needs to be fixed |
|
23 |
|
|
17 | 24 |
protected String oldType; |
18 | 25 |
|
19 | 26 |
protected String newType; |
... | ... | |
24 | 31 |
|
25 | 32 |
public static final String ABANDON = "abandon"; |
26 | 33 |
|
27 |
HashSet<String> noMatchValues = new HashSet<String>();
|
|
34 |
HashSet<String> noMatchValues = new HashSet<>(); |
|
28 | 35 |
|
29 |
public XMLTXMFileRuledConversion(File infile, LinkedHashMap<Pattern, String> rules, String oldType, String newType, String mode) throws IOException, XMLStreamException { |
|
36 |
/** |
|
37 |
* |
|
38 |
* @param infile the file to read |
|
39 |
* @param rules the conversion rules |
|
40 |
* @param elementPath if null, the conversion happens |
|
41 |
* @param oldType word type or element attribute to read |
|
42 |
* @param newType word type or element attribute to write |
|
43 |
* @param mode XMLTXMFileRuledConversion.DELETE or XMLTXMFileRuledConversion.COPY or XMLTXMFileRuledConversion.ABANDON |
|
44 |
* @throws IOException |
|
45 |
* @throws XMLStreamException |
|
46 |
*/ |
|
47 |
public XMLTXMFileRuledConversion(File infile, LinkedHashMap<Pattern, String> rules, String elementPath, String oldType, String newType, String mode) throws IOException, XMLStreamException { |
|
30 | 48 |
super(infile); |
31 | 49 |
this.rules = rules; |
32 | 50 |
this.oldType = oldType; |
... | ... | |
34 | 52 |
|
35 | 53 |
this.mode = mode; |
36 | 54 |
|
37 |
if (!this.newType.startsWith("#")) this.newType = "#" + this.newType; |
|
38 |
if (!this.oldType.startsWith("#")) this.oldType = "#" + this.oldType; |
|
55 |
if (elementPath != null && elementPath.length() > 0) { |
|
56 |
this.elementName = elementPath; |
|
57 |
} |
|
58 |
|
|
59 |
if (elementPath == null) { // where fixing txm:words |
|
60 |
if (!this.newType.startsWith("#")) this.newType = "#" + this.newType; |
|
61 |
if (!this.oldType.startsWith("#")) this.oldType = "#" + this.oldType; |
|
62 |
} |
|
39 | 63 |
} |
40 | 64 |
|
41 | 65 |
boolean inW = false, inAna = false, inForm; |
42 | 66 |
|
43 |
LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>();
|
|
67 |
LinkedHashMap<String, String> anaValues = new LinkedHashMap<>(); |
|
44 | 68 |
|
45 |
LinkedHashMap<String, String> anaResps = new LinkedHashMap<String, String>();
|
|
69 |
LinkedHashMap<String, String> anaResps = new LinkedHashMap<>(); |
|
46 | 70 |
|
47 | 71 |
String typeName = null; |
48 | 72 |
|
... | ... | |
52 | 76 |
|
53 | 77 |
private Object mode; |
54 | 78 |
|
79 |
public static class Converter { |
|
80 |
|
|
81 |
public Converter() { |
|
82 |
|
|
83 |
} |
|
84 |
|
|
85 |
public String getValue(XMLStreamReader parser, String localname, String attribute, String value) { |
|
86 |
return value; |
|
87 |
} |
|
88 |
} |
|
89 |
|
|
90 |
private Converter converter = null; |
|
91 |
|
|
55 | 92 |
@Override |
93 |
protected void writeAttributes() throws XMLStreamException { |
|
94 |
if (fixingElement) { |
|
95 |
String newValue = null; |
|
96 |
for (int i = 0; i < parser.getAttributeCount(); i++) { |
|
97 |
String att = parser.getAttributeLocalName(i); |
|
98 |
String value = parser.getAttributeValue(i); |
|
99 |
if (oldType != null && oldType.equals(att)) { |
|
100 |
if (converter != null) { |
|
101 |
if (debug) System.out.println("CALL CONVERTER with attribute $att"); |
|
102 |
newValue = converter.getValue(parser, localname, att, value); |
|
103 |
} |
|
104 |
else { |
|
105 |
newValue = getValueIfMatch(value); |
|
106 |
} |
|
107 |
} |
|
108 |
if (!newType.equals(att)) { // don't write the newType, it will be done at the end |
|
109 |
writeAttribute(parser.getAttributePrefix(i), att, value); |
|
110 |
} |
|
111 |
} |
|
112 |
|
|
113 |
if (oldType == null && converter != null) { |
|
114 |
if (debug) System.out.println("CALL CONVERTER with no attribute set"); |
|
115 |
newValue = converter.getValue(parser, localname, null, null); |
|
116 |
} |
|
117 |
|
|
118 |
if (newValue != null) { |
|
119 |
if (debug) System.out.println("WRITE NEWTYPE: " + newType + "=" + newValue); |
|
120 |
writeAttribute(null, newType, newValue); |
|
121 |
} |
|
122 |
} |
|
123 |
else { |
|
124 |
super.writeAttributes(); |
|
125 |
} |
|
126 |
} |
|
127 |
|
|
128 |
@Override |
|
56 | 129 |
public void processStartElement() throws XMLStreamException, IOException { |
57 |
if (!inW) super.processStartElement(); // don't write W content |
|
58 | 130 |
|
59 |
if (localname.equals("w")) { |
|
60 |
inW = true; |
|
61 |
anaValues.clear(); |
|
62 |
anaResps.clear(); |
|
131 |
|
|
132 |
if (elementName != null) { |
|
133 |
if (this.localname.equals(elementName)) { |
|
134 |
fixingElement = true; |
|
135 |
} |
|
63 | 136 |
|
64 |
// initialize the new type to a empty value in case there is transformation rule |
|
65 |
anaValues.put(newType, ""); |
|
66 |
anaResps.put(newType, "#txm_recode"); |
|
137 |
super.processStartElement(); |
|
138 |
fixingElement = false; |
|
67 | 139 |
} |
68 |
else if (localname.equals("ana")) { |
|
69 |
inAna = true; |
|
70 |
typeName = parser.getAttributeValue(null, "type"); |
|
71 |
respName = parser.getAttributeValue(null, "resp"); |
|
72 |
anaResps.put(typeName, respName); |
|
73 |
// if (typeName != null) typeName = typeName.substring(1); // remove # |
|
74 |
typeValue = ""; |
|
140 |
else { |
|
141 |
|
|
142 |
if (!inW) super.processStartElement(); // don't write W content |
|
143 |
|
|
144 |
if (localname.equals("w")) { |
|
145 |
inW = true; |
|
146 |
anaValues.clear(); |
|
147 |
anaResps.clear(); |
|
148 |
|
|
149 |
// initialize the new type to a empty value in case there is transformation rule |
|
150 |
anaValues.put(newType, ""); |
|
151 |
anaResps.put(newType, "#txm_recode"); |
|
152 |
} |
|
153 |
else if (localname.equals("ana")) { |
|
154 |
inAna = true; |
|
155 |
typeName = parser.getAttributeValue(null, "type"); |
|
156 |
respName = parser.getAttributeValue(null, "resp"); |
|
157 |
anaResps.put(typeName, respName); |
|
158 |
// if (typeName != null) typeName = typeName.substring(1); // remove # |
|
159 |
typeValue = ""; |
|
160 |
} |
|
161 |
else if (localname.equals("form")) { |
|
162 |
inForm = true; |
|
163 |
formValue = ""; |
|
164 |
} |
|
75 | 165 |
} |
76 |
else if (localname.equals("form")) { |
|
77 |
inForm = true; |
|
78 |
formValue = ""; |
|
79 |
} |
|
80 | 166 |
} |
81 | 167 |
|
82 | 168 |
@Override |
83 | 169 |
public void processCharacters() throws XMLStreamException { |
84 |
if (inW && inAna) typeValue += parser.getText(); |
|
85 |
else if (inW && inForm) formValue += parser.getText(); |
|
86 |
else super.processCharacters(); |
|
170 |
if (elementName != null) { |
|
171 |
super.processCharacters(); |
|
172 |
} |
|
173 |
else { |
|
174 |
if (inW && inAna) typeValue += parser.getText(); |
|
175 |
else if (inW && inForm) formValue += parser.getText(); |
|
176 |
else super.processCharacters(); |
|
177 |
} |
|
87 | 178 |
} |
88 | 179 |
|
89 | 180 |
@Override |
90 | 181 |
public void processEndElement() throws XMLStreamException { |
91 |
if (localname.equals("w")) { |
|
92 |
inW = false; |
|
182 |
|
|
183 |
if (elementName != null) { |
|
184 |
super.processEndElement(); |
|
185 |
} |
|
186 |
else { |
|
93 | 187 |
|
94 |
// write W content |
|
95 |
try { |
|
96 |
// get the value to test |
|
97 |
String value = null; |
|
98 |
if (oldType.equals("word")) { |
|
99 |
value = formValue; |
|
100 |
} |
|
101 |
else { |
|
102 |
value = anaValues.get(oldType); |
|
103 |
} |
|
188 |
if (localname.equals("w")) { |
|
189 |
inW = false; |
|
104 | 190 |
|
105 |
if (newType.equals("word")) { // update form property
|
|
106 |
updateFormValueIfMatch(value);
|
|
107 |
}
|
|
108 |
else { // update another word property
|
|
109 |
if (value != null) {
|
|
110 |
updateAnaValuesIfMatch(value);
|
|
191 |
// write W content
|
|
192 |
try {
|
|
193 |
// get the value to test
|
|
194 |
String value = null;
|
|
195 |
if (oldType.equals("word")) {
|
|
196 |
value = formValue;
|
|
111 | 197 |
} |
112 |
} |
|
113 |
|
|
114 |
// write the word element |
|
115 |
writer.writeStartElement("txm:form"); |
|
116 |
writer.writeCharacters(formValue); |
|
117 |
writer.writeEndElement(); |
|
118 |
|
|
119 |
for (String k : anaValues.keySet()) { |
|
120 |
String resp = anaResps.get(k); |
|
121 |
if (resp == null) resp = "#txm_recode"; |
|
198 |
else { |
|
199 |
value = anaValues.get(oldType); |
|
200 |
} |
|
122 | 201 |
|
123 |
writer.writeStartElement("txm:ana"); |
|
124 |
writer.writeAttribute("resp", resp); |
|
125 |
writer.writeAttribute("type", k); |
|
126 |
writer.writeCharacters(anaValues.get(k)); |
|
202 |
if (newType.equals("word")) { // update form property |
|
203 |
updateFormValueIfMatch(value); |
|
204 |
} |
|
205 |
else { // update a ana property |
|
206 |
if (value != null) { |
|
207 |
updateAnaValuesIfMatch(value); |
|
208 |
} |
|
209 |
} |
|
210 |
|
|
211 |
// write the word element |
|
212 |
writer.writeStartElement("txm:form"); |
|
213 |
writer.writeCharacters(formValue); |
|
127 | 214 |
writer.writeEndElement(); |
215 |
|
|
216 |
for (String k : anaValues.keySet()) { |
Formats disponibles : Unified diff