Révision 3742
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/annotation/SampleWordsMacro.groovy (revision 3742) | ||
---|---|---|
1 |
// Copyright © 2021 ENS Lyon |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
package org.txm.macro.annotation |
|
7 |
|
|
8 |
// STANDARD DECLARATIONS |
|
9 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
10 |
import org.txm.searchengine.cqp.corpus.* |
|
11 |
import org.txm.xml.DOMIdentityHook |
|
12 |
import org.txm.xml.LocalNameHookActivator |
|
13 |
import org.txm.xml.XMLProcessor |
|
14 |
import org.txm.objects.* |
|
15 |
import org.txm.rcp.utils.JobHandler |
|
16 |
import org.txm.rcp.views.corpora.CorporaView |
|
17 |
import org.eclipse.core.runtime.Status |
|
18 |
import org.txm.annotation.kr.core.KRAnnotationEngine |
|
19 |
import org.txm.annotation.kr.core.repository.* |
|
20 |
import org.txm.annotation.kr.rcp.commands.SaveAnnotations |
|
21 |
import org.txm.annotation.kr.rcp.concordance.WordAnnotationToolbar |
|
22 |
import org.txm.importer.StaxIdentityParser |
|
23 |
|
|
24 |
def scriptName = this.class.getSimpleName() |
|
25 |
|
|
26 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
27 |
println "** $scriptName: please select a Corpus in the Corpus view." |
|
28 |
return 0 |
|
29 |
} |
|
30 |
|
|
31 |
MainCorpus mcorpus = corpusViewSelection |
|
32 |
Project project = mcorpus.getProject() |
|
33 |
File txmDir = new File(project.getProjectDirectory(), "txm/"+mcorpus.getName()) |
|
34 |
|
|
35 |
@Field @Option(name="nWordsPerText", usage="the number of words per text", widget="Integer", required=true, def="") |
|
36 |
File nWordsPerText |
|
37 |
|
|
38 |
|
|
39 |
@Field @Option(name="debug", usage="Afficher les messages de debug", widget="Boolean", required=true, def="false") |
|
40 |
def debug |
|
41 |
|
|
42 |
// Open the parameters input dialog box |
|
43 |
if (!ParametersDialog.open(this)) return |
|
44 |
|
|
45 |
if (!outputDirectory.exists()) { |
|
46 |
println "Error: output directory not found: "+outputDirectory |
|
47 |
return false |
|
48 |
} |
|
49 |
|
|
50 |
values_to_ignore_regex = /$values_to_ignore_regex/ |
|
51 |
|
|
52 |
project.compute(); |
|
53 |
|
|
54 |
for (Text text : project.getChildren(Text.class)) { |
|
55 |
println "Text: "+text |
|
56 |
|
|
57 |
File orig = text.getXMLTXMFile() |
|
58 |
if ((orig == null) || !orig.exists()) { |
|
59 |
println "** Error: no XML-TXM file found for "+text |
|
60 |
return // next ! |
|
61 |
} |
|
62 |
|
|
63 |
File result = new File(outputDirectory, orig.getName()) |
|
64 |
|
|
65 |
if (false && result.lastModified() > orig.lastModified()) { |
|
66 |
println "Skipping $text: result file is more recent: $result" |
|
67 |
continue // next ! |
|
68 |
} |
|
69 |
|
|
70 |
XMLProcessor xp = new XMLProcessor(orig); |
|
71 |
LocalNameHookActivator activator = new LocalNameHookActivator("w") |
|
72 |
|
|
73 |
nWords = 0 |
|
74 |
new DOMIdentityHook("word", activator, xp) { |
|
75 |
@Override |
|
76 |
public void processDom() { |
|
77 |
//println dom |
|
78 |
use(groovy.xml.dom.DOMCategory) { |
|
79 |
nWords++ |
|
80 |
if (nWords > nWordsPerText) { |
|
81 |
dom = null |
|
82 |
} |
|
83 |
} |
|
84 |
} |
|
85 |
} |
|
86 |
xp.process(result) |
|
87 |
|
|
88 |
if (nWords == 0) { |
|
89 |
println "Removing empty <text>: $text: $result" |
|
90 |
result.delete() |
|
91 |
} |
|
92 |
} |
|
93 |
|
|
94 |
println "Done: update the corpus using the 'Corpus > Update' main menu to see the modifications in commnds." |
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/corpus/TruncateTextsAtFirstWordsMacro.groovy (revision 3742) | ||
---|---|---|
1 |
// Copyright © 2021 ENS Lyon |
|
2 |
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html) |
|
3 |
// @author mdecorde |
|
4 |
// @author sheiden |
|
5 |
|
|
6 |
package org.txm.macro.corpus |
|
7 |
|
|
8 |
// STANDARD DECLARATIONS |
|
9 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
10 |
import org.txm.searchengine.cqp.corpus.* |
|
11 |
import org.txm.xml.DOMIdentityHook |
|
12 |
import org.txm.xml.LocalNameHookActivator |
|
13 |
import org.txm.xml.XMLProcessor |
|
14 |
import org.txm.objects.* |
|
15 |
import org.txm.rcp.utils.JobHandler |
|
16 |
import org.txm.rcp.views.corpora.CorporaView |
|
17 |
import org.eclipse.core.runtime.Status |
|
18 |
import org.txm.annotation.kr.core.KRAnnotationEngine |
|
19 |
import org.txm.annotation.kr.core.repository.* |
|
20 |
import org.txm.annotation.kr.rcp.commands.SaveAnnotations |
|
21 |
import org.txm.annotation.kr.rcp.concordance.WordAnnotationToolbar |
|
22 |
import org.txm.importer.StaxIdentityParser |
|
23 |
|
|
24 |
def scriptName = this.class.getSimpleName() |
|
25 |
|
|
26 |
if (!(corpusViewSelection instanceof MainCorpus)) { |
|
27 |
println "** $scriptName: please select a Corpus in the Corpus view." |
|
28 |
return 0 |
|
29 |
} |
|
30 |
|
|
31 |
MainCorpus mcorpus = corpusViewSelection |
|
32 |
Project project = mcorpus.getProject() |
|
33 |
File txmDir = new File(project.getProjectDirectory(), "txm/"+mcorpus.getName()) |
|
34 |
|
|
35 |
@Field @Option(name="nWordsPerText", usage="the number of words per text", widget="Integer", required=true, def="") |
|
36 |
File nWordsPerText |
|
37 |
|
|
38 |
|
|
39 |
@Field @Option(name="debug", usage="Afficher les messages de debug", widget="Boolean", required=true, def="false") |
|
40 |
def debug |
|
41 |
|
|
42 |
// Open the parameters input dialog box |
|
43 |
if (!ParametersDialog.open(this)) return |
|
44 |
|
|
45 |
if (!outputDirectory.exists()) { |
|
46 |
println "Error: output directory not found: "+outputDirectory |
|
47 |
return false |
|
48 |
} |
|
49 |
|
|
50 |
values_to_ignore_regex = /$values_to_ignore_regex/ |
|
51 |
|
|
52 |
project.compute(); |
|
53 |
|
|
54 |
for (Text text : project.getChildren(Text.class)) { |
|
55 |
println "Text: "+text |
|
56 |
|
|
57 |
File orig = text.getXMLTXMFile() |
|
58 |
if ((orig == null) || !orig.exists()) { |
|
59 |
println "** Error: no XML-TXM file found for "+text |
|
60 |
return // next ! |
|
61 |
} |
|
62 |
|
|
63 |
File result = new File(outputDirectory, orig.getName()) |
|
64 |
|
|
65 |
if (false && result.lastModified() > orig.lastModified()) { |
|
66 |
println "Skipping $text: result file is more recent: $result" |
|
67 |
continue // next ! |
|
68 |
} |
|
69 |
|
|
70 |
XMLProcessor xp = new XMLProcessor(orig); |
|
71 |
LocalNameHookActivator activator = new LocalNameHookActivator("w") |
|
72 |
|
|
73 |
nWords = 0 |
|
74 |
new DOMIdentityHook("word", activator, xp) { |
|
75 |
@Override |
|
76 |
public void processDom() { |
|
77 |
//println dom |
|
78 |
use(groovy.xml.dom.DOMCategory) { |
|
79 |
nWords++ |
|
80 |
if (nWords > nWordsPerText) { |
|
81 |
dom = null |
|
82 |
} |
|
83 |
} |
|
84 |
} |
|
85 |
} |
|
86 |
xp.process(result) |
|
87 |
|
|
88 |
if (nWords == 0) { |
|
89 |
println "Removing empty <text>: $text: $result" |
|
90 |
result.delete() |
|
91 |
} |
|
92 |
} |
|
93 |
|
|
94 |
println "Done: update the corpus using the 'Corpus > Update' main menu to see the modifications in commnds." |
Formats disponibles : Unified diff