Révision 3742

TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/annotation/SampleWordsMacro.groovy (revision 3742)
1
// Copyright © 2021 ENS Lyon
2
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html)
3
// @author mdecorde
4
// @author sheiden
5

  
6
package org.txm.macro.annotation
7

  
8
// STANDARD DECLARATIONS
9
import org.txm.rcpapplication.swt.widget.parameters.*
10
import org.txm.searchengine.cqp.corpus.*
11
import org.txm.xml.DOMIdentityHook
12
import org.txm.xml.LocalNameHookActivator
13
import org.txm.xml.XMLProcessor
14
import org.txm.objects.*
15
import org.txm.rcp.utils.JobHandler
16
import org.txm.rcp.views.corpora.CorporaView
17
import org.eclipse.core.runtime.Status
18
import org.txm.annotation.kr.core.KRAnnotationEngine
19
import org.txm.annotation.kr.core.repository.*
20
import org.txm.annotation.kr.rcp.commands.SaveAnnotations
21
import org.txm.annotation.kr.rcp.concordance.WordAnnotationToolbar
22
import org.txm.importer.StaxIdentityParser
23

  
24
def scriptName = this.class.getSimpleName()
25

  
26
if (!(corpusViewSelection instanceof MainCorpus)) {
27
	println "** $scriptName: please select a Corpus in the Corpus view."
28
	return 0
29
}
30

  
31
MainCorpus mcorpus = corpusViewSelection
32
Project project = mcorpus.getProject()
33
File txmDir = new File(project.getProjectDirectory(), "txm/"+mcorpus.getName())
34

  
35
@Field @Option(name="nWordsPerText", usage="the number of words per text", widget="Integer", required=true, def="")
36
File nWordsPerText
37

  
38

  
39
@Field @Option(name="debug", usage="Afficher les messages de debug", widget="Boolean", required=true, def="false")
40
def debug
41

  
42
// Open the parameters input dialog box
43
if (!ParametersDialog.open(this)) return
44

  
45
if (!outputDirectory.exists()) {
46
	println "Error: output directory not found: "+outputDirectory
47
	return false
48
}
49

  
50
values_to_ignore_regex = /$values_to_ignore_regex/
51

  
52
project.compute();
53

  
54
for (Text text : project.getChildren(Text.class)) {
55
	println "Text: "+text
56
	
57
	File orig = text.getXMLTXMFile()
58
	if ((orig == null) || !orig.exists()) {
59
		println "** Error: no XML-TXM file found for "+text
60
		return // next !
61
	}
62
	
63
	File result = new File(outputDirectory, orig.getName())
64
	
65
	if (false && result.lastModified() > orig.lastModified()) {
66
		println "Skipping $text: result file is more recent: $result"
67
		continue // next !
68
	}
69
	
70
	XMLProcessor xp = new XMLProcessor(orig);
71
	LocalNameHookActivator activator = new LocalNameHookActivator("w")
72

  
73
	nWords = 0
74
	new DOMIdentityHook("word", activator, xp) {
75
		@Override
76
		public void processDom() {
77
			//println dom
78
			use(groovy.xml.dom.DOMCategory) {
79
				nWords++
80
				if (nWords >  nWordsPerText) {
81
					dom = null
82
				}
83
			}
84
		}
85
	}
86
	xp.process(result)
87
	
88
	if (nWords == 0) {
89
		println "Removing empty <text>: $text: $result"
90
		result.delete()
91
	}
92
}
93

  
94
println "Done: update the corpus using the 'Corpus > Update' main menu to see the modifications in commnds."
TXM/trunk/bundles/org.txm.groovy.core/src/groovy/org/txm/macro/corpus/TruncateTextsAtFirstWordsMacro.groovy (revision 3742)
1
// Copyright © 2021 ENS Lyon
2
// Licensed under the terms of the GNU General Public License version 3 (http://www.gnu.org/licenses/gpl-3.0.html)
3
// @author mdecorde
4
// @author sheiden
5

  
6
package org.txm.macro.corpus
7

  
8
// STANDARD DECLARATIONS
9
import org.txm.rcpapplication.swt.widget.parameters.*
10
import org.txm.searchengine.cqp.corpus.*
11
import org.txm.xml.DOMIdentityHook
12
import org.txm.xml.LocalNameHookActivator
13
import org.txm.xml.XMLProcessor
14
import org.txm.objects.*
15
import org.txm.rcp.utils.JobHandler
16
import org.txm.rcp.views.corpora.CorporaView
17
import org.eclipse.core.runtime.Status
18
import org.txm.annotation.kr.core.KRAnnotationEngine
19
import org.txm.annotation.kr.core.repository.*
20
import org.txm.annotation.kr.rcp.commands.SaveAnnotations
21
import org.txm.annotation.kr.rcp.concordance.WordAnnotationToolbar
22
import org.txm.importer.StaxIdentityParser
23

  
24
def scriptName = this.class.getSimpleName()
25

  
26
if (!(corpusViewSelection instanceof MainCorpus)) {
27
	println "** $scriptName: please select a Corpus in the Corpus view."
28
	return 0
29
}
30

  
31
MainCorpus mcorpus = corpusViewSelection
32
Project project = mcorpus.getProject()
33
File txmDir = new File(project.getProjectDirectory(), "txm/"+mcorpus.getName())
34

  
35
@Field @Option(name="nWordsPerText", usage="the number of words per text", widget="Integer", required=true, def="")
36
File nWordsPerText
37

  
38

  
39
@Field @Option(name="debug", usage="Afficher les messages de debug", widget="Boolean", required=true, def="false")
40
def debug
41

  
42
// Open the parameters input dialog box
43
if (!ParametersDialog.open(this)) return
44

  
45
if (!outputDirectory.exists()) {
46
	println "Error: output directory not found: "+outputDirectory
47
	return false
48
}
49

  
50
values_to_ignore_regex = /$values_to_ignore_regex/
51

  
52
project.compute();
53

  
54
for (Text text : project.getChildren(Text.class)) {
55
	println "Text: "+text
56
	
57
	File orig = text.getXMLTXMFile()
58
	if ((orig == null) || !orig.exists()) {
59
		println "** Error: no XML-TXM file found for "+text
60
		return // next !
61
	}
62
	
63
	File result = new File(outputDirectory, orig.getName())
64
	
65
	if (false && result.lastModified() > orig.lastModified()) {
66
		println "Skipping $text: result file is more recent: $result"
67
		continue // next !
68
	}
69
	
70
	XMLProcessor xp = new XMLProcessor(orig);
71
	LocalNameHookActivator activator = new LocalNameHookActivator("w")
72

  
73
	nWords = 0
74
	new DOMIdentityHook("word", activator, xp) {
75
		@Override
76
		public void processDom() {
77
			//println dom
78
			use(groovy.xml.dom.DOMCategory) {
79
				nWords++
80
				if (nWords >  nWordsPerText) {
81
					dom = null
82
				}
83
			}
84
		}
85
	}
86
	xp.process(result)
87
	
88
	if (nWords == 0) {
89
		println "Removing empty <text>: $text: $result"
90
		result.delete()
91
	}
92
}
93

  
94
println "Done: update the corpus using the 'Corpus > Update' main menu to see the modifications in commnds."

Formats disponibles : Unified diff