Révision 986
tmp/org.txm.partition.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:12 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:12 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.searchengine.cqp.core/src/org/txm/searchengine/cqp/corpus/QueryResult.java (revision 986) | ||
---|---|---|
41 | 41 |
import org.txm.searchengine.cqp.corpus.query.Query; |
42 | 42 |
import org.txm.searchengine.cqp.serverException.CqiServerError; |
43 | 43 |
|
44 |
// TODO: Auto-generated Javadoc |
|
45 | 44 |
/** |
46 | 45 |
* The result of a CQP query on a corpus. |
47 | 46 |
* |
... | ... | |
233 | 232 |
List<Match> res = new ArrayList<Match>(); |
234 | 233 |
try { |
235 | 234 |
starts = getStarts(from, to); |
236 |
ends = CorpusManager.getCorpusManager().getCqiClient() |
|
237 |
.dumpSubCorpus(getQualifiedCqpId(), |
|
238 |
NetCqiClient.CQI_CONST_FIELD_MATCHEND, from, to); |
|
239 |
targets = CorpusManager.getCorpusManager().getCqiClient() |
|
240 |
.dumpSubCorpus(getQualifiedCqpId(), |
|
241 |
NetCqiClient.CQI_CONST_FIELD_TARGET, from, to); |
|
235 |
ends = getEnds(from, to); |
|
236 |
targets = getTargets(from ,to); |
|
242 | 237 |
} catch (Exception e) { |
243 | 238 |
throw new CqiClientException(e); |
244 | 239 |
} |
... | ... | |
248 | 243 |
return res; |
249 | 244 |
} |
250 | 245 |
|
246 |
private int[] getTargets(int from, int to) throws UnexpectedAnswerException, IOException, CqiServerError { |
|
247 |
return CorpusManager.getCorpusManager().getCqiClient() |
|
248 |
.dumpSubCorpus(getQualifiedCqpId(), |
|
249 |
NetCqiClient.CQI_CONST_FIELD_TARGET, from, to); |
|
250 |
} |
|
251 |
|
|
251 | 252 |
/* (non-Javadoc) |
252 | 253 |
* @see org.txm.searchengine.cqp.corpus.CqpObject#getName() |
253 | 254 |
*/ |
tmp/org.txm.searchengine.cqp.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:16 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,res/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:16 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,res/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.statsengine.r.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:18 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/,icons/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:18 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/,icons/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.annotation.kr.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:03 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:03 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.statsengine.r.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:17 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,plugin.xml,.,R/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:17 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,plugin.xml,.,R/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.lexicaltable.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:11 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:11 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/editors/TXMEditor.java (revision 986) | ||
---|---|---|
987 | 987 |
value = ((NewNavigationWidget)object).getCurrentPosition(); |
988 | 988 |
} |
989 | 989 |
else if(object instanceof AssistedQueryWidget) { |
990 |
value = ((AssistedQueryWidget)object).getQuery();
|
|
990 |
value = ((AssistedQueryWidget)object).getRawString(); // save the raw query without fixes
|
|
991 | 991 |
} |
992 | 992 |
else if(object instanceof QueryWidget) { |
993 | 993 |
value = ((QueryWidget)object).getQuery(); |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/views/fileexplorer/MacroExplorer.java (revision 986) | ||
---|---|---|
98 | 98 |
private static void initCurrentDirectory() { |
99 | 99 |
home = Toolbox.getTxmHomePath(); |
100 | 100 |
if (home == null) home = System.getProperty("user.home"); //$NON-NLS-1$ |
101 |
File scriptsDir = new File(home, "scripts"); //$NON-NLS-1$ |
|
102 |
File currentRootDir = new File(scriptsDir, "macro/org/txm/macro"); //$NON-NLS-1$
|
|
101 |
File scriptsDir = new File(home, "scripts/groovy/user/"); //$NON-NLS-1$
|
|
102 |
File currentRootDir = new File(scriptsDir, "org/txm/macro"); //$NON-NLS-1$ |
|
103 | 103 |
home = currentRootDir.getAbsolutePath(); |
104 | 104 |
} |
105 | 105 |
|
... | ... | |
163 | 163 |
newMacro.addSelectionListener(new SelectionAdapter() { |
164 | 164 |
@Override |
165 | 165 |
public void widgetSelected(SelectionEvent e) { |
166 |
String scriptRootDir = Toolbox.getTxmHomePath() + "/scripts"; //$NON-NLS-1$ |
|
167 |
File currentRootDir = new File(scriptRootDir, "macro/org/txm/macro"); //$NON-NLS-1$
|
|
166 |
String scriptRootDir = Toolbox.getTxmHomePath() + "/scripts/groovy/user"; //$NON-NLS-1$
|
|
167 |
File currentRootDir = new File(scriptRootDir, "org/txm/macro"); //$NON-NLS-1$ |
|
168 | 168 |
currentRootDir.mkdirs(); |
169 | 169 |
|
170 | 170 |
InputDialog dialog = new InputDialog(e.display.getActiveShell(), TXMUIMessages.CreateMacro_0, TXMUIMessages.CreateMacro_1, "", null); //$NON-NLS-1$ |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/handlers/scripts/ExecuteImportScript.java (revision 986) | ||
---|---|---|
102 | 102 |
String filename = null, filepath = null; |
103 | 103 |
if (event.getParameters().containsKey("org.txm.rcp.commands.commandParameter3")) { |
104 | 104 |
filename = event.getParameter("org.txm.rcp.commands.commandParameter3"); //$NON-NLS-1$ |
105 |
filepath = "scripts/import/" + filename; //$NON-NLS-1$
|
|
105 |
filepath = "scripts/user/org/txm/scripts/importer" + filename; //$NON-NLS-1$
|
|
106 | 106 |
} else { |
107 | 107 |
Shell shell = HandlerUtil.getActiveWorkbenchWindowChecked(event).getShell(); |
108 | 108 |
FileDialog dialog = new FileDialog(shell, SWT.SAVE); |
109 |
dialog.setFilterPath(Toolbox.getTxmHomePath()+"/scripts/import");
|
|
109 |
dialog.setFilterPath(Toolbox.getTxmHomePath()+"/scripts/user/org/txm/scripts/importer");
|
|
110 | 110 |
filepath = dialog.open(); |
111 | 111 |
} |
112 | 112 |
|
... | ... | |
333 | 333 |
// retrieve the script relative path "quickLoader" -> "quick" |
334 | 334 |
String scriptPackage = script.getName(); |
335 | 335 |
int idx = scriptPackage.indexOf("Loader"); |
336 |
scriptPackage = "org/txm/importer/" + scriptPackage.substring(0, idx); |
|
336 |
scriptPackage = "org/txm/scripts/importer/" + scriptPackage.substring(0, idx);
|
|
337 | 337 |
|
338 | 338 |
return BundleUtils.replaceFilesIfNewer("org.txm.groovy.core", "src/groovy", scriptPackage, |
339 | 339 |
script.getName(), script); |
tmp/org.txm.rcp/src/main/java/org/txm/rcp/utils/GSERunner.java (revision 986) | ||
---|---|---|
1 | 1 |
package org.txm.rcp.utils; |
2 | 2 |
|
3 |
import groovy.lang.Binding; |
|
4 |
import groovy.util.GroovyScriptEngine; |
|
5 |
import groovy.util.ResourceException; |
|
6 |
import groovy.util.ScriptException; |
|
7 |
|
|
8 | 3 |
import java.io.File; |
9 | 4 |
import java.io.FilenameFilter; |
10 | 5 |
import java.io.IOException; |
... | ... | |
21 | 16 |
import org.osgi.framework.BundleContext; |
22 | 17 |
import org.osgi.framework.wiring.BundleWiring; |
23 | 18 |
import org.txm.Toolbox; |
24 |
import org.txm.core.preferences.TBXPreferences; |
|
25 | 19 |
import org.txm.rcp.TXMClassLoader; |
26 | 20 |
import org.txm.utils.logger.Log; |
27 | 21 |
|
28 | 22 |
import cern.colt.Arrays; |
23 |
import groovy.lang.Binding; |
|
24 |
import groovy.util.GroovyScriptEngine; |
|
25 |
import groovy.util.ResourceException; |
|
26 |
import groovy.util.ScriptException; |
|
29 | 27 |
|
30 | 28 |
/** |
31 | 29 |
* gse wrapper for TXM scripts and macros |
... | ... | |
55 | 53 |
* @return |
56 | 54 |
*/ |
57 | 55 |
public static GSERunner buildDefaultGSE(File script) { |
58 |
String scriptRootDir = Toolbox.getPreference(TBXPreferences.USER_TXM_HOME)+"/scripts";
|
|
56 |
String scriptRootDir = new File(Toolbox.getTxmHomePath(), "scripts/groovy/").getAbsolutePath();
|
|
59 | 57 |
if (defaultScriptRootDir.equals(scriptRootDir) && defaultGSE != null) { |
60 | 58 |
try { |
61 | 59 |
defaultGSE.getGroovyClassLoader().addURL(new URL("file://"+script.getParent())); |
... | ... | |
67 | 65 |
} |
68 | 66 |
|
69 | 67 |
if (scriptRootDir == null) { |
70 |
scriptRootDir = new File(Toolbox.getTxmHomePath(), "scripts").getAbsolutePath(); |
|
68 |
scriptRootDir = new File(Toolbox.getTxmHomePath(), "scripts/groovy/").getAbsolutePath();
|
|
71 | 69 |
} |
72 | 70 |
String[] roots = new String[] { |
71 |
scriptRootDir+"/system/", //$NON-NLS-1$ |
|
73 | 72 |
scriptRootDir+"/user/", //$NON-NLS-1$ |
74 |
scriptRootDir+"/macro/", //$NON-NLS-1$ |
|
75 |
scriptRootDir+"/import/", //$NON-NLS-1$ |
|
76 |
script.getParent(), |
|
77 | 73 |
}; |
78 | 74 |
|
79 | 75 |
try { |
tmp/org.txm.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:15 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,splash.bmp,lib/,rcp.product,plugin_customization.ini,lib/commons-cli-1.2.jar,lib/commons-lang-2.4.jar,lib/junit-4.5.jar,lib/log4j-1.2.12.jar,installtreetagger_en.html,css/,p2.inf,bin/,OSGI-INF/,TXM.sh,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/,src/main/java/ |
|
5 |
bin.excludes=lib/batik/ |
|
1 |
#Fri Jul 06 10:25:15 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,splash.bmp,lib/,rcp.product,plugin_customization.ini,lib/commons-cli-1.2.jar,lib/commons-lang-2.4.jar,lib/junit-4.5.jar,lib/log4j-1.2.12.jar,installtreetagger_en.html,css/,p2.inf,bin/,OSGI-INF/,TXM.sh,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/,src/main/java/ |
|
5 |
bin.excludes=lib/batik/ |
|
6 |
qualifier=svn |
tmp/org.txm.backtomedia.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:04 CEST 2018 |
|
2 |
bin.includes=plugin.xml,META-INF/,OSGI-INF/,.,icons/,vlcj-2.4.1/jna-3.5.2.jar,vlcj-2.4.1/platform-3.5.2.jar,vlcj-2.4.1/vlcj-2.4.1.jar,opal-0.9.5.2.jar,OSGI-INF/l10n/bundle.properties |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:04 CEST 2018 |
|
2 |
bin.includes=plugin.xml,META-INF/,OSGI-INF/,.,icons/,vlcj-2.4.1/jna-3.5.2.jar,vlcj-2.4.1/platform-3.5.2.jar,vlcj-2.4.1/vlcj-2.4.1.jar,opal-0.9.5.2.jar,OSGI-INF/l10n/bundle.properties |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.backtomedia.rcp/META-INF/MANIFEST.MF (revision 986) | ||
---|---|---|
107 | 107 |
Bundle-Version: 1.0.0.qualifier |
108 | 108 |
Bundle-Name: %Bundle-Name |
109 | 109 |
Bundle-ManifestVersion: 2 |
110 |
Bundle-SymbolicName: org.txm.backtomedia;singleton:=true |
|
110 |
Bundle-SymbolicName: org.txm.backtomedia.rcp;singleton:=true
|
|
111 | 111 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.7 |
112 | 112 |
|
tmp/org.txm.wordcloud.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:20 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:20 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.cooccurrence.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:08 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/,icons/ |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:08 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/,icons/ |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.chartsengine.svgbatik.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:07 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:07 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.annotation.kr.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:03 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:03 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.searchengine.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:16 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:16 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.tigersearch.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:19 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,lib/,lib/dom4j-1.6.1.jar,lib/log4j-1.2.12.jar,lib/TigerSearch.jar,groovy/org/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:19 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,lib/,lib/dom4j-1.6.1.jar,lib/log4j-1.2.12.jar,lib/TigerSearch.jar,groovy/org/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.cql2lsa.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:09 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:09 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.textsbalance.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:18 CEST 2018 |
|
2 |
bin.includes=plugin.xml,META-INF/,.,icons/,OSGI-INF/l10n/bundle.properties |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:18 CEST 2018 |
|
2 |
bin.includes=plugin.xml,META-INF/,.,icons/,OSGI-INF/l10n/bundle.properties |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.annotation.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:02 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,libs/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:02 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,libs/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.progression.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:13 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:13 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.querycooccurrences.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:15 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:15 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.querycooccurrences.rcp/META-INF/MANIFEST.MF (revision 986) | ||
---|---|---|
15 | 15 |
Bundle-Vendor: Textometrie.org |
16 | 16 |
Bundle-ActivationPolicy: lazy |
17 | 17 |
Bundle-Version: 1.0.0.qualifier |
18 |
Bundle-Name: org.txm.querycooccurrence.rcp |
|
18 |
Bundle-Name: org.txm.querycooccurrences.rcp
|
|
19 | 19 |
Bundle-ManifestVersion: 2 |
20 | 20 |
Bundle-Activator: listcooccurrences.Activator |
21 |
Bundle-SymbolicName: org.txm.querycooccurrence.rcp;singleton:=true |
|
21 |
Bundle-SymbolicName: org.txm.querycooccurrences.rcp;singleton:=true
|
|
22 | 22 |
Bundle-RequiredExecutionEnvironment: JavaSE-1.7 |
23 | 23 |
|
tmp/org.txm.referencer.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:15 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:15 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.treetagger.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:19 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:19 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.wordcloud.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:20 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:20 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.partition.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:13 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:13 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.internalview.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:11 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:11 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.index.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:10 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:10 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.links.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:12 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:12 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.textsbalance.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:18 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:18 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.statsengine.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:17 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:17 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.lexicaltable.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:11 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:11 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.annotation.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:03 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:03 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.progression.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:14 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml,icons/,OSGI-INF/ |
|
3 |
output..=bin/ |
|
4 |
forceContextQualifier=737 |
|
5 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:14 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml,icons/,OSGI-INF/ |
|
3 |
output..=bin/ |
|
4 |
forceContextQualifier=737 |
|
5 |
source..=src/ |
|
6 |
qualifier=svn |
tmp/org.txm.chartsengine.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:05 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:05 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.concordance.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:08 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml,icons/,OSGI-INF/ |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:08 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml,icons/,OSGI-INF/ |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.analec.rcp/META-INF/MANIFEST.MF (revision 986) | ||
---|---|---|
10 | 10 |
org.txm.concordance.rcp;bundle-version="1.0.0";visibility:=reexport, |
11 | 11 |
org.txm.progression.rcp;bundle-version="1.0.0";visibility:=reexport, |
12 | 12 |
org.txm.lexicaltable.rcp;bundle-version="1.0.0";visibility:=reexport, |
13 |
org.txm.edition.rcp;bundle-version="1.0.0", |
|
14 |
org.txm.annotation.rcp |
|
13 |
org.txm.edition.rcp;bundle-version="1.0.0";visibility:=reexport,
|
|
14 |
org.txm.annotation.rcp;visibility:=reexport
|
|
15 | 15 |
Export-Package: JamaPlus, |
16 | 16 |
JamaPlus.util, |
17 | 17 |
analecrcp, |
tmp/org.txm.analec.rcp/src/org/txm/analec/export/AnalecAnnotationTEIExporter.java (revision 986) | ||
---|---|---|
16 | 16 |
import org.eclipse.core.runtime.IProgressMonitor; |
17 | 17 |
import org.txm.Toolbox; |
18 | 18 |
import org.txm.importer.StaxIdentityParser; |
19 |
import org.txm.importer.StaxStackWriter; |
|
19 |
import org.txm.importer.scripts.StaxStackWriter;
|
|
20 | 20 |
import org.txm.importer.PersonalNamespaceContext; |
21 | 21 |
import org.txm.objects.BaseParameters; |
22 | 22 |
import org.txm.rcp.Application; |
tmp/org.txm.chartsengine.jfreechart.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:06 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:06 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,icons/,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.para.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:12 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:12 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.ca.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:05 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:05 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.chartsengine.raster.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:07 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/l10n/bundle.properties |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:07 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/l10n/bundle.properties |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.chartsengine.r.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:07 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:07 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.para.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:12 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:12 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.ca.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:05 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml,icons/,OSGI-INF/ |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:05 CEST 2018 |
|
2 |
bin.includes=META-INF/,.,plugin.xml,icons/,OSGI-INF/ |
|
3 |
output..=bin/ |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.groovy.rcp/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:10 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,contexts.xml,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:10 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=plugin.xml,META-INF/,.,icons/,contexts.xml,OSGI-INF/l10n/bundle.properties |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.specificities.core/build.properties (revision 986) | ||
---|---|---|
1 |
#Fri Jul 06 10:25:17 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
1 |
#Fri Jul 06 10:25:17 CEST 2018 |
|
2 |
output..=bin/ |
|
3 |
bin.includes=META-INF/,.,plugin.xml |
|
4 |
source..=src/ |
|
5 |
qualifier=svn |
tmp/org.txm.core/src/java/org/txm/importer/WExtract.groovy (revision 986) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2015-12-17 12:11:39 +0100 (jeu. 17 déc. 2015) $ |
|
25 |
// $LastChangedRevision: 3087 $ |
|
26 |
// $LastChangedBy: mdecorde $ |
|
27 |
// |
|
28 |
package org.txm.importer |
|
29 |
|
|
30 |
import javax.xml.parsers.DocumentBuilder; |
|
31 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
32 |
import javax.xml.parsers.ParserConfigurationException; |
|
33 |
import javax.xml.transform.OutputKeys; |
|
34 |
import javax.xml.transform.Result; |
|
35 |
import javax.xml.transform.Source; |
|
36 |
import javax.xml.transform.Transformer; |
|
37 |
import javax.xml.transform.TransformerFactory; |
|
38 |
import javax.xml.transform.dom.DOMSource; |
|
39 |
import javax.xml.transform.stream.StreamResult; |
|
40 |
|
|
41 |
import org.w3c.dom.Document; |
|
42 |
import org.w3c.dom.Element; |
|
43 |
import org.w3c.dom.NodeList; |
|
44 |
import org.xml.sax.SAXException; |
|
45 |
|
|
46 |
import javax.xml.stream.*; |
|
47 |
import java.io.File; |
|
48 |
import java.net.URL; |
|
49 |
|
|
50 |
// TODO: Auto-generated Javadoc |
|
51 |
/** |
|
52 |
* Extract w tags from a tei file |
|
53 |
* not finished. |
|
54 |
* |
|
55 |
* @author mdecorde |
|
56 |
*/ |
|
57 |
class WExtract |
|
58 |
{ |
|
59 |
|
|
60 |
/** |
|
61 |
* Process. |
|
62 |
* |
|
63 |
* @param infile the infile |
|
64 |
* @param outfile the outfile |
|
65 |
* @param max the max |
|
66 |
* @return the java.lang. object |
|
67 |
*/ |
|
68 |
public process(File infile, File outfile, int max) |
|
69 |
{ |
|
70 |
println "Process "+infile.getName()+", keep $max words" |
|
71 |
int count = this.countW(infile); |
|
72 |
if(count < max) |
|
73 |
{ |
|
74 |
println "can't extract $max words, the file "+infile.getName()+" contains only $count words" |
|
75 |
return; |
|
76 |
} |
|
77 |
String ms = "#ms_K" |
|
78 |
int tier = max/3; |
|
79 |
int from1 = 0 |
|
80 |
int to1 = tier; |
|
81 |
int from2 = (count/2) - (tier/2); |
|
82 |
int to2 =(count/2) + (tier/2); |
|
83 |
int from3 = count -tier; |
|
84 |
int to3= count-1; |
|
85 |
boolean isSic = false; |
|
86 |
boolean isW = false; |
|
87 |
boolean isText = false; |
|
88 |
boolean printW = true; |
|
89 |
int wcount=0; |
|
90 |
|
|
91 |
println " count : "+count |
|
92 |
println " get from "+from1+" to "+to1 |
|
93 |
println " get from "+from2+" to "+to2 |
|
94 |
println " get from "+from3+" to "+to3 |
|
95 |
|
|
96 |
|
|
97 |
String localname; |
|
98 |
String prefix; |
|
99 |
InputStream inputData = infile.toURI().toURL().openStream(); |
|
100 |
XMLInputFactory inputfactory = XMLInputFactory.newInstance(); |
|
101 |
XMLStreamReader parser = inputfactory.createXMLStreamReader(inputData); |
|
102 |
XMLOutputFactory factory = XMLOutputFactory.newInstance(); |
|
103 |
|
|
104 |
FileOutputStream output = new FileOutputStream(outfile); |
|
105 |
XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8"); |
|
106 |
|
|
107 |
writer.writeStartDocument("utf-8", "1.0"); |
|
108 |
|
|
109 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) |
|
110 |
{ |
|
111 |
if(isText) |
|
112 |
{ |
|
113 |
if((wcount >= from1 && wcount <= to1 )|| |
|
114 |
(wcount >= from2 && wcount <= to2) || |
|
115 |
(wcount >= from3 && wcount <= to3)) |
|
116 |
printW = true; |
|
117 |
else |
|
118 |
printW = false; |
|
119 |
} |
|
120 |
else |
|
121 |
printW = true; |
|
122 |
|
|
123 |
switch (event) |
|
124 |
{ |
|
125 |
case XMLStreamConstants.START_ELEMENT: |
|
126 |
localname = parser.getLocalName(); |
|
127 |
prefix = parser.getPrefix(); |
|
128 |
|
|
129 |
/* |
|
130 |
if(localname == "supplied") |
|
131 |
if(parser.getAttributeValue(null,"source") != null) |
|
132 |
ms = parser.getAttributeValue(null,"source") |
|
133 |
if(localname == "sic") |
|
134 |
{ |
|
135 |
isSic= true; |
|
136 |
} |
|
137 |
*/ |
|
138 |
if(localname == "text") |
|
139 |
isText = true; |
|
140 |
|
|
141 |
if(localname == "w") |
|
142 |
{ |
|
143 |
isW= true; |
|
144 |
wcount++; |
|
145 |
|
|
146 |
if(isText) |
|
147 |
{ |
|
148 |
if((wcount >= from1 && wcount <= to1 )|| |
|
149 |
(wcount >= from2 && wcount <= to2) || |
|
150 |
(wcount >= from3 && wcount <= to3)) |
|
151 |
printW = true; |
|
152 |
else |
|
153 |
printW = false; |
|
154 |
} |
|
155 |
else |
|
156 |
printW = true; |
|
157 |
} |
|
158 |
|
|
159 |
/*if(!isSic) |
|
160 |
if(localname != "choice" && localname != "corr" && localname != "sic" && localname != "supplied" && localname != "seg") |
|
161 |
{*/ |
|
162 |
if(localname == "w") |
|
163 |
{ |
|
164 |
if(printW) |
|
165 |
{ |
|
166 |
if(prefix != null && prefix.length() > 0) |
|
167 |
writer.writeStartElement(prefix+":"+localname); |
|
168 |
else |
|
169 |
writer.writeStartElement(localname); |
|
170 |
|
|
171 |
for(int i= 0 ; i < parser.getAttributeCount() ;i++ ) |
|
172 |
{ |
|
173 |
if(parser.getAttributePrefix(i)!= "") |
|
174 |
writer.writeAttribute(parser.getAttributePrefix(i)+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
175 |
else |
|
176 |
writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
177 |
} |
|
178 |
//writer.writeAttribute("srcmf:src", ms); |
|
179 |
} |
|
180 |
} |
|
181 |
else |
|
182 |
{ |
|
183 |
if(prefix != null && prefix.length() > 0) |
|
184 |
writer.writeStartElement(prefix+":"+localname); |
|
185 |
else |
|
186 |
writer.writeStartElement(localname); |
|
187 |
|
|
188 |
if(localname == "teiHeader") |
|
189 |
{ |
|
190 |
writer.writeAttribute("xmlns:me", "http://www.menota.org/ns/1.0"); |
|
191 |
writer.writeAttribute("xmlns:bfm", "http://bfm.ens-lsh.fr/ns/1.0"); |
|
192 |
//writer.writeAttribute("xmlns:srcmf", "https://listes.cru.fr/wiki/srcmf/index"); |
|
193 |
} |
|
194 |
|
|
195 |
if(localname == "TEI") |
|
196 |
{ |
|
197 |
writer.writeAttribute("xmlns","http://www.tei-c.org/ns/1.0"); |
|
198 |
} |
|
199 |
|
|
200 |
for(int i= 0 ; i < parser.getAttributeCount() ;i++ ) |
|
201 |
{ |
|
202 |
if(parser.getAttributePrefix(i)!= "") |
|
203 |
writer.writeAttribute(parser.getAttributePrefix(i)+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
204 |
else |
|
205 |
writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
206 |
} |
|
207 |
} |
|
208 |
//} |
|
209 |
break; |
|
210 |
|
|
211 |
case XMLStreamConstants.END_ELEMENT: |
|
212 |
localname =parser.getLocalName() |
|
213 |
|
|
214 |
/*if(localname == "sic") |
|
215 |
isSic= false; |
|
216 |
if(localname == "w") |
|
217 |
isW= false; |
|
218 |
if(localname == "supplied" && ms != "#ms_K") |
|
219 |
ms = "#ms_K"; |
|
220 |
if(!isSic) |
|
221 |
if(localname != "choice" && localname != "corr" && localname != "sic" && localname != "supplied" && localname != "seg") |
|
222 |
{*/ |
|
223 |
if(localname == "w") |
|
224 |
{ |
|
225 |
if(printW) |
|
226 |
{ |
|
227 |
writer.writeEndElement(); |
|
228 |
writer.writeCharacters("\n"); |
|
229 |
} |
|
230 |
} |
|
231 |
else |
|
232 |
{ |
|
233 |
writer.writeEndElement(); |
|
234 |
writer.writeCharacters("\n"); |
|
235 |
} |
|
236 |
// } |
|
237 |
|
|
238 |
break; |
|
239 |
|
|
240 |
case XMLStreamConstants.CHARACTERS: |
|
241 |
//if(!isSic) |
|
242 |
if(isW) |
|
243 |
{ |
|
244 |
if(printW) |
|
245 |
{ |
|
246 |
writer.writeCharacters(parser.getText().trim()); |
|
247 |
} |
|
248 |
} |
|
249 |
else |
|
250 |
writer.writeCharacters(parser.getText().trim()); |
|
251 |
break; |
|
252 |
} |
|
253 |
} |
|
254 |
writer.flush(); |
|
255 |
writer.close(); |
|
256 |
output.close() |
|
257 |
inputData.close(); |
|
258 |
} |
|
259 |
|
|
260 |
/** |
|
261 |
* Count w. |
|
262 |
* |
|
263 |
* @param infile the infile |
|
264 |
* @return the int |
|
265 |
*/ |
|
266 |
public int countW(File infile) |
|
267 |
{ |
|
268 |
InputStream inputData = infile.toURI().toURL().openStream(); |
|
269 |
XMLInputFactory inputfactory = XMLInputFactory.newInstance(); |
|
270 |
XMLStreamReader parser = inputfactory.createXMLStreamReader(inputData); |
|
271 |
|
|
272 |
int count = 0; |
|
273 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) |
|
274 |
{ |
|
275 |
switch (event) |
|
276 |
{ |
|
277 |
case XMLStreamConstants.START_ELEMENT: |
|
278 |
if(parser.getLocalName() == "w") |
|
279 |
count++; |
|
280 |
} |
|
281 |
} |
|
282 |
inputData.close() |
|
283 |
return count; |
|
284 |
} |
|
285 |
|
|
286 |
/** |
|
287 |
* The main method. |
|
288 |
* |
|
289 |
* @param args the arguments |
|
290 |
*/ |
|
291 |
public static void main(String[] args) |
|
292 |
{ |
|
293 |
String userDir = System.getProperty("user.home"); |
|
294 |
|
|
295 |
File directory = new File(userDir+"/xml/extract/"); |
|
296 |
File outdir = new File(userDir+"/xml/extract/","results"); |
|
297 |
outdir.mkdir(); |
|
298 |
|
|
299 |
File maxfile = new File(userDir+"/xml/extract/maxfile"); |
|
300 |
/* |
|
301 |
* maxfile format: |
|
302 |
* |
|
303 |
* filename1.xml 45000 |
|
304 |
* filename2.xml 22500 |
|
305 |
* filename3.xml 45000 |
|
306 |
*/ |
|
307 |
HashMap<File, Integer> maxperfile = new HashMap<File, Integer>(); |
|
308 |
maxfile.eachLine{it-> |
|
309 |
String[] split = it.split("\t"); |
|
310 |
if(split.length == 2) |
|
311 |
{ |
|
312 |
try |
|
313 |
{ |
|
314 |
String filename = it.split("\t")[0]; |
|
315 |
int max = Integer.parseInt(it.split("\t")[1]) |
|
316 |
maxperfile.put(filename, max); |
|
317 |
}catch(Exception e ){} |
|
318 |
} |
|
319 |
} |
|
320 |
println maxperfile; |
|
321 |
|
|
322 |
def files = directory.listFiles(); |
|
323 |
for(File infile : files) |
|
324 |
{ |
|
325 |
|
|
326 |
if(maxperfile.containsKey(infile.getName())) |
|
327 |
{ |
|
328 |
File outfile = new File(outdir, infile.getName()); |
|
329 |
int max= maxperfile.get(infile.getName()); |
|
330 |
new WExtract().process(infile, outfile, max) |
|
331 |
} |
|
332 |
} |
|
333 |
} |
|
334 |
} |
tmp/org.txm.core/src/java/org/txm/importer/WExtractWithMode.groovy (revision 986) | ||
---|---|---|
1 |
// Copyright © 2010-2013 ENS de Lyon. |
|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of |
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice |
|
4 |
// Sophia Antipolis, University of Paris 3. |
|
5 |
// |
|
6 |
// The TXM platform is free software: you can redistribute it |
|
7 |
// and/or modify it under the terms of the GNU General Public |
|
8 |
// License as published by the Free Software Foundation, |
|
9 |
// either version 2 of the License, or (at your option) any |
|
10 |
// later version. |
|
11 |
// |
|
12 |
// The TXM platform is distributed in the hope that it will be |
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
15 |
// PURPOSE. See the GNU General Public License for more |
|
16 |
// details. |
|
17 |
// |
|
18 |
// You should have received a copy of the GNU General |
|
19 |
// Public License along with the TXM platform. If not, see |
|
20 |
// http://www.gnu.org/licenses. |
|
21 |
// |
|
22 |
// |
|
23 |
// |
|
24 |
// $LastChangedDate: 2011-10-19 17:50:26 +0200 (mer., 19 oct. 2011) $ |
|
25 |
// $LastChangedRevision: 2038 $ |
|
26 |
// $LastChangedBy: alavrentev $ |
|
27 |
// |
|
28 |
package org.txm.importer |
|
29 |
|
|
30 |
import javax.xml.parsers.DocumentBuilder; |
|
31 |
import javax.xml.parsers.DocumentBuilderFactory; |
|
32 |
import javax.xml.parsers.ParserConfigurationException; |
|
33 |
import javax.xml.transform.OutputKeys; |
|
34 |
import javax.xml.transform.Result; |
|
35 |
import javax.xml.transform.Source; |
|
36 |
import javax.xml.transform.Transformer; |
|
37 |
import javax.xml.transform.TransformerFactory; |
|
38 |
import javax.xml.transform.dom.DOMSource; |
|
39 |
import javax.xml.transform.stream.StreamResult; |
|
40 |
|
|
41 |
import org.w3c.dom.Document; |
|
42 |
import org.w3c.dom.Element; |
|
43 |
import org.w3c.dom.NodeList; |
|
44 |
import org.xml.sax.SAXException; |
|
45 |
|
|
46 |
import javax.xml.stream.*; |
|
47 |
import java.io.File; |
|
48 |
import java.net.URL; |
|
49 |
|
|
50 |
// TODO: Auto-generated Javadoc |
|
51 |
/** |
|
52 |
* Extract w tags from a tei file |
|
53 |
* not finished. |
|
54 |
* |
|
55 |
* @author mdecorde |
|
56 |
*/ |
|
57 |
class WExtractWithMode |
|
58 |
{ |
|
59 |
|
|
60 |
/** |
|
61 |
* Process. |
|
62 |
* |
|
63 |
* @param infile the infile |
|
64 |
* @param outfile the outfile |
|
65 |
* @param max the max |
|
66 |
* @return the java.lang. object |
|
67 |
*/ |
|
68 |
public process(File infile, File outfile, String modemax) |
|
69 |
{ |
|
70 |
println "Process "+infile.getName()+", keep $modemax words" |
|
71 |
int count = this.countW(infile); |
|
72 |
|
|
73 |
int max = 0 |
|
74 |
String mode = "" |
|
75 |
|
|
76 |
try { |
|
77 |
mode = modemax.split("/")[0] |
|
78 |
max = Integer.parseInt(modemax.split("/")[1]) |
|
79 |
}catch(Exception e ){} |
|
80 |
|
|
81 |
if(count < max) |
|
82 |
{ |
|
83 |
println "can't extract $max words, the file "+infile.getName()+" contains only $count words" |
|
84 |
return; |
|
85 |
} |
|
86 |
//String ms = "#ms_K" |
|
87 |
int part = 0; |
|
88 |
if (mode == "3") |
|
89 |
{ |
|
90 |
part = max/3 |
|
91 |
} |
|
92 |
else if (mode == "2") |
|
93 |
{ |
|
94 |
part = max/2 |
|
95 |
} |
|
96 |
else if (mode == "1a" || mode == "1m" || mode == "1z") |
|
97 |
{ |
|
98 |
part = max |
|
99 |
} |
|
100 |
else |
|
101 |
{ |
|
102 |
println "mode must be 1a, 1m, 1z, 2 or 3" |
|
103 |
return |
|
104 |
} |
|
105 |
int from1 = 0 |
|
106 |
int to1 = 0 |
|
107 |
if (mode != "1m" && mode != "1z") |
|
108 |
{ |
|
109 |
to1 = part |
|
110 |
} |
|
111 |
int from2 = 0 |
|
112 |
int to2 = 0 |
|
113 |
if (mode == "3" || mode == "1m") |
|
114 |
{ |
|
115 |
from2 = (count/2) - (part/2); |
|
116 |
to2 =(count/2) + (part/2); |
|
117 |
} |
|
118 |
int from3 = 0 |
|
119 |
int to3 = 0 |
|
120 |
if (mode != "1a" && mode != "1m") |
|
121 |
{ |
|
122 |
from3 = count -part; |
|
123 |
to3= count-1; |
|
124 |
} |
|
125 |
boolean isSic = false; |
|
126 |
boolean isW = false; |
|
127 |
boolean isText = false; |
|
128 |
boolean printW = true; |
|
129 |
int wcount=0; |
|
130 |
|
|
131 |
println " count : "+count |
|
132 |
println " get from "+from1+" to "+to1 |
|
133 |
println " get from "+from2+" to "+to2 |
|
134 |
println " get from "+from3+" to "+to3 |
|
135 |
|
|
136 |
|
|
137 |
String localname; |
|
138 |
String prefix; |
|
139 |
InputStream inputData = infile.toURI().toURL().openStream(); |
|
140 |
XMLInputFactory inputfactory = XMLInputFactory.newInstance(); |
|
141 |
XMLStreamReader parser = inputfactory.createXMLStreamReader(inputData); |
|
142 |
XMLOutputFactory factory = XMLOutputFactory.newInstance(); |
|
143 |
|
|
144 |
FileOutputStream output = new FileOutputStream(outfile) |
|
145 |
XMLStreamWriter writer = factory.createXMLStreamWriter(output, "UTF-8"); |
|
146 |
|
|
147 |
writer.writeStartDocument("utf-8", "1.0"); |
|
148 |
|
|
149 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) |
|
150 |
{ |
|
151 |
if(isText) |
|
152 |
{ |
|
153 |
if((wcount >= from1 && wcount <= to1 )|| |
|
154 |
(wcount >= from2 && wcount <= to2) || |
|
155 |
(wcount >= from3 && wcount <= to3)) |
|
156 |
printW = true; |
|
157 |
else |
|
158 |
printW = false; |
|
159 |
} |
|
160 |
else |
|
161 |
printW = true; |
|
162 |
|
|
163 |
switch (event) |
|
164 |
{ |
|
165 |
case XMLStreamConstants.START_ELEMENT: |
|
166 |
localname = parser.getLocalName(); |
|
167 |
prefix = parser.getPrefix(); |
|
168 |
|
|
169 |
/* |
|
170 |
if(localname == "supplied") |
|
171 |
if(parser.getAttributeValue(null,"source") != null) |
|
172 |
ms = parser.getAttributeValue(null,"source") |
|
173 |
if(localname == "sic") |
|
174 |
{ |
|
175 |
isSic= true; |
|
176 |
} |
|
177 |
*/ |
|
178 |
if(localname == "text") |
|
179 |
isText = true; |
|
180 |
|
|
181 |
if(localname == "w") |
|
182 |
{ |
|
183 |
isW= true; |
|
184 |
wcount++; |
|
185 |
|
|
186 |
if(isText) |
|
187 |
{ |
|
188 |
if((wcount >= from1 && wcount <= to1 )|| |
|
189 |
(wcount >= from2 && wcount <= to2) || |
|
190 |
(wcount >= from3 && wcount <= to3)) |
|
191 |
printW = true; |
|
192 |
else |
|
193 |
printW = false; |
|
194 |
} |
|
195 |
else |
|
196 |
printW = true; |
|
197 |
} |
|
198 |
|
|
199 |
/*if(!isSic) |
|
200 |
if(localname != "choice" && localname != "corr" && localname != "sic" && localname != "supplied" && localname != "seg") |
|
201 |
{*/ |
|
202 |
if(localname == "w") |
|
203 |
{ |
|
204 |
if(printW) |
|
205 |
{ |
|
206 |
if(prefix != null && prefix.length() > 0) |
|
207 |
writer.writeStartElement(prefix+":"+localname); |
|
208 |
else |
|
209 |
writer.writeStartElement(localname); |
|
210 |
|
|
211 |
for(int i= 0 ; i < parser.getAttributeCount() ;i++ ) |
|
212 |
{ |
|
213 |
if(parser.getAttributePrefix(i)!= "") |
|
214 |
writer.writeAttribute(parser.getAttributePrefix(i)+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
215 |
else |
|
216 |
writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
217 |
} |
|
218 |
//writer.writeAttribute("srcmf:src", ms); |
|
219 |
} |
|
220 |
} |
|
221 |
else |
|
222 |
{ |
|
223 |
if(prefix != null && prefix.length() > 0) |
|
224 |
writer.writeStartElement(prefix+":"+localname); |
|
225 |
else |
|
226 |
writer.writeStartElement(localname); |
|
227 |
|
|
228 |
if(localname == "teiHeader") |
|
229 |
{ |
|
230 |
writer.writeAttribute("xmlns:me", "http://www.menota.org/ns/1.0"); |
|
231 |
writer.writeAttribute("xmlns:bfm", "http://bfm.ens-lsh.fr/ns/1.0"); |
|
232 |
//writer.writeAttribute("xmlns:srcmf", "https://listes.cru.fr/wiki/srcmf/index"); |
|
233 |
} |
|
234 |
|
|
235 |
if(localname == "TEI") |
|
236 |
{ |
|
237 |
writer.writeAttribute("xmlns","http://www.tei-c.org/ns/1.0"); |
|
238 |
} |
|
239 |
|
|
240 |
for(int i= 0 ; i < parser.getAttributeCount() ;i++ ) |
|
241 |
{ |
|
242 |
if(parser.getAttributePrefix(i)!= "") |
|
243 |
writer.writeAttribute(parser.getAttributePrefix(i)+":"+parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
244 |
else |
|
245 |
writer.writeAttribute(parser.getAttributeLocalName(i), parser.getAttributeValue(i)); |
|
246 |
} |
|
247 |
} |
|
248 |
//} |
|
249 |
break; |
|
250 |
|
|
251 |
case XMLStreamConstants.END_ELEMENT: |
|
252 |
localname =parser.getLocalName() |
|
253 |
|
|
254 |
/*if(localname == "sic") |
|
255 |
isSic= false; |
|
256 |
if(localname == "w") |
|
257 |
isW= false; |
|
258 |
if(localname == "supplied" && ms != "#ms_K") |
|
259 |
ms = "#ms_K"; |
|
260 |
if(!isSic) |
|
261 |
if(localname != "choice" && localname != "corr" && localname != "sic" && localname != "supplied" && localname != "seg") |
|
262 |
{*/ |
|
263 |
if(localname == "w") |
|
264 |
{ |
|
265 |
if(printW) |
|
266 |
{ |
|
267 |
writer.writeEndElement(); |
|
268 |
writer.writeCharacters("\n"); |
|
269 |
} |
|
270 |
} |
|
271 |
else |
|
272 |
{ |
|
273 |
writer.writeEndElement(); |
|
274 |
writer.writeCharacters("\n"); |
|
275 |
} |
|
276 |
// } |
|
277 |
|
|
278 |
break; |
|
279 |
|
|
280 |
case XMLStreamConstants.CHARACTERS: |
|
281 |
//if(!isSic) |
|
282 |
if(isW) |
|
283 |
{ |
|
284 |
if(printW) |
|
285 |
{ |
|
286 |
writer.writeCharacters(parser.getText().trim()); |
|
287 |
} |
|
288 |
} |
|
289 |
else |
|
290 |
writer.writeCharacters(parser.getText().trim()); |
|
291 |
break; |
|
292 |
} |
|
293 |
} |
|
294 |
writer.flush(); |
|
295 |
writer.close(); |
|
296 |
output.close() |
|
297 |
inputData.close(); |
|
298 |
} |
|
299 |
|
|
300 |
/** |
|
301 |
* Count w. |
|
302 |
* |
|
303 |
* @param infile the infile |
|
304 |
* @return the int |
|
305 |
*/ |
|
306 |
public int countW(File infile) |
|
307 |
{ |
|
308 |
InputStream inputData = infile.toURI().toURL().openStream(); |
|
309 |
XMLInputFactory inputfactory = XMLInputFactory.newInstance(); |
|
310 |
XMLStreamReader parser = inputfactory.createXMLStreamReader(inputData); |
|
311 |
|
|
312 |
int count = 0; |
|
313 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) |
|
314 |
{ |
|
315 |
switch (event) |
|
316 |
{ |
|
317 |
case XMLStreamConstants.START_ELEMENT: |
|
318 |
if(parser.getLocalName() == "w") |
|
319 |
count++; |
|
320 |
} |
|
321 |
} |
|
322 |
inputData.close() |
|
323 |
return count; |
|
324 |
} |
|
325 |
|
|
326 |
/** |
|
327 |
* The main method. |
|
328 |
* |
|
329 |
* @param args the arguments |
|
330 |
*/ |
|
331 |
public static void main(String[] args) |
|
332 |
{ |
|
333 |
String userDir = System.getProperty("user.home"); |
|
334 |
|
|
335 |
File directory = new File(userDir+"/xml/extract/"); |
|
336 |
File outdir = new File(userDir+"/xml/extract/","results"); |
|
337 |
outdir.mkdir(); |
|
338 |
|
|
339 |
File maxfilemode = new File(userDir+"/xml/extract/maxfilemode"); |
|
340 |
/* |
|
341 |
* maxfilemode format: |
|
342 |
* |
|
343 |
* filename1.xml 3 45000 |
|
344 |
* filename2.xml 1a 15000 |
|
345 |
* filename3.xml 1m 15000 |
|
346 |
* filename4.xml 1z 15000 |
|
347 |
* filename5.xml 2 22500 |
Formats disponibles : Unified diff