Révision 3288

TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/URSUnits2CQPStructMacro.groovy (revision 3288)
1
// STANDARD DECLARATIONS
2
package org.txm.macro.urs.prototypes.cqp
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.annotation.urs.*
7
import org.txm.importer.ValidateXml
8
import org.txm.rcp.swt.widget.parameters.*
9
import org.txm.searchengine.cqp.corpus.*
10
import visuAnalec.elements.*
11

  
12
// BEGINNING OF PARAMETERS
13

  
14
// Open the parameters input dialog box
15
//if (!ParametersDialog.open(this)) return;
16

  
17
if (!(corpusViewSelection instanceof MainCorpus)) {
18
	println "Selection must be a Corpus"
19
	return
20
}
21

  
22
MainCorpus corpus = corpusViewSelection
23
def analecCorpus = URSCorpora.getCorpus(corpus)
24

  
25
def texts = corpus.getCorpusTextIdsList();
26
def texts_startlimits = corpus.getTextStartLimits()
27
def texts_endlimits = corpus.getTextEndLimits()
28

  
29
for (int i = 0 ; i < texts.size() ; i++) {
30

  
31
	println "Processing annotations of "+texts[i]+"..."
32

  
33
	def text_id = texts[i]
34
	def text_start = texts_startlimits[i]
35
	def text_end = texts_endlimits[i]
36

  
37
	File xmltxmFile = new File(corpus.getProject().getProjectDirectory(), "txm/"+corpus.getID()+"/"+text_id+".xml")
38
	File xmltxmFileCopy = new File(corpus.getProject().getProjectDirectory(), text_id+"_copy.xml")
39

  
40
	if (!xmltxmFile.exists()) {
41
		println "Warning: no text file found: "+xmltxmFile
42
		continue
43
	}
44

  
45
	// WRITE MILESTONES UNITS
46

  
47
	println "-> MILESTONES UNITS"
48

  
49
	def units = []
50
	for (String unit_type : analecCorpus.getStructure().getTypes(Unite.class)) {
51
		def corpus_units = []
52
		ArrayList<Unite> all_units = analecCorpus.getUnites(unit_type)
53
		corpus_units = all_units.findAll() {
54
			it.getProp("type") != null && it.getProp("type").trim().length() > 0 && "yes".equals(it.getProp("milestone")) && text_start <= it.getDeb() && it.getFin() < text_end && !("true".equals(it.getProp("written")))
55
		}
56
		units.addAll(corpus_units)
57
	}
58

  
59
	if (units.size()== 0) {
60
		println "No milestones to write"
61
	} else {
62
		try {
63
			println "processing milestones Units ${text_id} and its units "+units.size()
64
			MileStoneInserter inserter = new MileStoneInserter(corpus, xmltxmFile, units);
65
			if (inserter.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) {
66
				xmltxmFile.delete()
67
				xmltxmFileCopy.renameTo(xmltxmFile)
68

  
69
				for (Unite unit : units) {
70
					unit.getProps()["written"] = "true"
71
				}
72
				URSCorpora.saveCorpus(corpus);
73
				println "Done, "+units.size()+ " milestones written"
74

  
75
			} else {
76
				println "Error while processing milestones $xmltxmFile file"
77
				File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
78
				error.getParentFile().mkdirs()
79
				println "	moving created file to $error"
80
				error.delete()
81
				xmltxmFileCopy.renameTo(error)
82
			}
83
		} catch(Exception e) {
84
			println "Error while processing milestones $xmltxmFile file: "+e
85
			File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
86
			error.getParentFile().mkdirs()
87
			println "	moving created file to $error"
88
			error.delete()
89
			xmltxmFileCopy.renameTo(error)
90
		}
91
	}
92
	// WRITE NON MILESTONES UNITS
93
	println "-> OTHER UNITS"
94

  
95
	for (String unit_type : analecCorpus.getStructure().getTypes(Unite.class)) {
96
		
97
		def corpus_units = []
98
		ArrayList<Unite> all_units = analecCorpus.getUnites(unit_type)
99
		corpus_units = all_units.findAll() {
100
			it.getProp("type") != null && it.getProp("type").trim().length() > 0 && !"no".equals(it.getProp("milestone")) && text_start <= it.getDeb() && it.getFin() < text_end && !("true".equals(it.getProp("written")))
101
		}
102

  
103
		if (corpus_units.size() == 0) continue;
104
		
105
		println "processing Units ${text_id} and its $unit_type units "+corpus_units.size()
106
		UnitsInserter inserter2 = new UnitsInserter(corpus, xmltxmFile, corpus_units, unit_type);
107
		try {
108
			if (inserter2.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) {
109
				xmltxmFile.delete()
110
				xmltxmFileCopy.renameTo(xmltxmFile)
111

  
112
				for (Unite unit : corpus_units) {
113
					unit.getProps()["written"] = "true"
114
				}
115
				URSCorpora.saveCorpus(corpus);
116
				println "Done, "+corpus_units.size()+ " units written"
117

  
118
			} else {
119
				println "Error while processing milestones $xmltxmFile file"
120
				File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
121
				error.getParentFile().mkdirs()
122
				println "	moving created file to $error"
123
				error.delete()
124
				xmltxmFileCopy.renameTo(error)
125
			}
126
		} catch(Exception e) {
127
			println "Error while processing milestones $xmltxmFile file: "+e
128
			File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
129
			error.getParentFile().mkdirs()
130
			println "	moving created file to $error"
131
			error.delete()
132
			xmltxmFileCopy.renameTo(error)
133
		}
134
	}
135
}
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/WordUnitsInserter.groovy (revision 3288)
1
package org.txm.macro.urs.prototypes.cqp
2

  
3
import java.io.IOException
4
import java.util.Date
5
import java.util.LinkedHashMap
6

  
7
import javax.xml.stream.XMLStreamException
8

  
9
import org.txm.Toolbox
10
import org.txm.importer.StaxIdentityParser
11
import org.txm.macro.urs.AnalecUtils
12
import org.txm.scripts.importer.GetAttributeValue
13
import org.txm.searchengine.cqp.CQPSearchEngine
14
import org.txm.searchengine.cqp.corpus.MainCorpus
15
import visuAnalec.elements.Unite
16

  
17
public class WordUnitsInserter extends StaxIdentityParser {
18

  
19
	List<Unite> units
20
	String[] ids
21
	File inputFile
22
	def id2Units = [:]
23
	def unit_properties // "*" or list (String)
24
	def unit_word // ALL START END
25
	LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>();
26

  
27
	public WordUnitsInserter(MainCorpus corpus, File inputFile, List<Unite> units, String unit_properties, String unit_word) {
28
		super(inputFile)
29
		this.inputFile = inputFile
30
		
31
		this.unit_properties = unit_properties
32
		this.unit_word = unit_word
33
		if (!("*".equals(unit_properties))) {
34
			this.unit_properties = this.unit_properties.split(",")
35
		}
36

  
37
		this.units = units
38
		this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
39

  
40
		for (int i = 0 ; i < units.size() ; i++) {
41
			Unite u = units[i]
42
			int[] positions = null;
43
			if ("START".equals(unit_word)) {
44
				positions = [u.getDeb()] as int[]
45
			} else if ("END".equals(unit_word)) {
46
				positions = [u.getFin()] as int[]
47
			} else {
48
				positions = u.getDeb()..u.getFin()
49
			}
50

  
51
			ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions)
52

  
53
			for (String id : ids) {
54
				if (!id2Units.containsKey(id)) {
55
					id2Units[id] = []
56
				}
57
				id2Units[id] << u
58
			}
59
		}
60
	}
61

  
62
	boolean inAna = false
63
	String ana_type, ana_resp, ana_value
64
	boolean inW = false
65
	String word_id = null
66

  
67
	protected void processStartElement() throws XMLStreamException, IOException {
68

  
69
		if ("w".equals(localname)) {
70
			
71
			inW = true
72
			word_id = this.getParserAttributeValue("id")
73

  
74
			if (id2Units.containsKey(word_id)) {
75
				for (Unite u : id2Units[word_id]) {
76
					def props = u.getProps()
77
					
78
					def propNames = props.keySet();
79
					if (!("*".equals(unit_properties))) {
80
						propNames = unit_properties
81
					}				
82
					
83
					for (String p : propNames) {
84
						if ("type".equals(p)) continue;
85
						if ("written".equals(p)) continue;
86
						if ("milestone".equals(p)) continue;
87
						
88
						if (!anaValues.containsKey(p)) {
89
							anaValues[p] = ""
90
							ana_resp = "#txm"
91
						}
92
						anaValues[p] = (anaValues[p]+" "+props.get(p)).trim()
93
					}
94
				}
95
				if (anaValues.size() > 0) println anaValues
96
			}
97

  
98
			super.processStartElement(); // write the tag
99
			
100
		} else if ("ana".equals(localname) && inW) {
101
			
102
			inAna = true
103
			ana_type = this.getParserAttributeValue("type").substring(1)
104
			ana_resp = this.getParserAttributeValue("resp")
105
			ana_value = ""
106
			
107
		} else {
108
			super.processStartElement()
109
		}
110
	}
111

  
112
	@Override
113
	public void processCharacters() throws XMLStreamException {
114
		if (inAna) {
115
			ana_value += parser.getText().trim()
116
		} else {
117
			super.processCharacters()
118
		}
119
	}
120

  
121
	protected void processEndElement() throws XMLStreamException {
122

  
123
		if ("w".equals(localname)) {
124
			
125
			// write the last values
126
			for (String ana_type : anaValues.keySet()) {
127
				writer.writeStartElement("txm:ana")
128
				writer.writeAttribute("type", "#" + ana_type)
129
				writer.writeAttribute("resp", "#txm") // change
130
				writer.writeCharacters(anaValues[ana_type])
131
				writer.writeEndElement()
132
			}
133
			
134
			anaValues.clear()
135
			super.processEndElement() // finally write word then close annotations
136
			inW = false
137
			
138
		} else if ("ana".equals(localname) && inW) {
139

  
140
			if (!anaValues.containsKey(ana_type)) {
141
				anaValues[ana_type] = ana_value.trim()
142
			} else {
143
				ana_resp = "#txm" // set the resp to txm since anaValues update the ana value
144
				anaValues[ana_type] = (anaValues[ana_type]+" "+ana_value.trim()).trim()
145
			}
146

  
147
			String value = anaValues[ana_type]
148

  
149
			writer.writeStartElement("txm:ana")
150
			writer.writeAttribute("type", "#" + ana_type)
151
			writer.writeAttribute("resp", ana_resp) // change
152
			writer.writeCharacters(value)
153
			writer.writeEndElement()
154
			
155
			anaValues.remove(ana_type)
156
			
157
			inAna = false
158
			ana_type = null
159
			ana_resp = null
160
			ana_value = null
161

  
162
			// write ana later
163
		} else {
164
			super.processEndElement()
165
		}
166
	}
167
}
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/MileStoneInserter.groovy (revision 3288)
1
package org.txm.macro.urs.prototypes.cqp
2

  
3
import java.io.IOException
4

  
5
import javax.xml.stream.XMLStreamException
6

  
7
import org.txm.importer.StaxIdentityParser
8
import org.txm.macro.urs.AnalecUtils
9
import org.txm.searchengine.cqp.CQPSearchEngine
10
import org.txm.searchengine.cqp.corpus.MainCorpus
11
import visuAnalec.elements.Unite
12

  
13
public class MileStoneInserter extends StaxIdentityParser {
14
	
15
	List<Unite> units
16
	String[] ids
17
	File inputFile
18
	def id2Units = [:]
19
	
20
	public MileStoneInserter(MainCorpus corpus, File inputFile, List<Unite> units) {
21
		super(inputFile);
22
		this.inputFile = inputFile;
23
		
24
		this.units = units;
25
		this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
26
		
27
		int[] positions = new int[units.size()];
28
		for( int i = 0 ; i < units.size() ; i++) {
29
			positions[i] = units.get(i).getDeb();
30
		}
31
		
32
		ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions);
33
		
34
		for (int i = 0 ; i < ids.length ; i++) {
35
			String id = ids[i]
36
			if (id != null) {
37
				if (!id2Units.containsKey(id)) id2Units[id] = []
38
				
39
				id2Units[id] << units[i]
40
			}
41
		}
42
	}
43
	
44
	boolean start = false;
45
	String word_id = null;
46
	@Override
47
	protected void processStartElement() throws XMLStreamException, IOException {
48
		
49
		if ("text".equals(localname)) {
50
			start = true;
51
		} else if ("w".equals(localname) && start) {
52
			word_id = getParserAttributeValue("id");
53
			if (word_id == null) {
54
				println "Warning: found <w> without id at line "+parser.getLocation().getLineNumber()+" in "+inputFile
55
			} else {
56
				writeAllUnits(word_id, "before")
57
			}
58
		}
59
		
60
		super.processStartElement();
61
	}
62
	
63
	protected void writeAllUnits(String id, String position) {
64
		
65
		def units = id2Units[id]
66
		if (units == null) return; // no units to write
67
		
68
		for (Unite currentUnit : units) {
69
			
70
			if (!position.equals(currentUnit.getProp("position"))) return;
71
				
72
			writer.writeStartElement(currentUnit.getProp("type"));
73
			HashMap props = currentUnit.getProps();
74
			for (String p : props.keySet()) {
75
				if (p.equals("type")) continue; // ignore the type since written in tag name
76
				writer.writeAttribute(p, ""+props.get(p));
77
			}
78
			writer.writeEndElement();
79
		}
80
	}
81
	
82
	@Override
83
	protected void processEndElement() throws XMLStreamException {
84
		super.processEndElement();
85
	
86
		if ("w".equals(localname) && start && word_id != null) {
87
			writeAllUnits(word_id, "after")
88
		}
89
		
90
		if ("w".equals(localname)) {
91
			word_id = null;
92
		}
93
	}
94
}
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/UnitsInserter.groovy (revision 3288)
1
package org.txm.macro.urs.prototypes.cqp
2

  
3
import java.io.IOException
4

  
5
import javax.xml.stream.XMLStreamException
6

  
7
import org.txm.importer.StaxIdentityParser
8
import org.txm.macro.urs.AnalecUtils
9
import org.txm.searchengine.cqp.CQPSearchEngine
10
import org.txm.searchengine.cqp.corpus.MainCorpus
11
import visuAnalec.elements.Unite
12

  
13
public class UnitsInserter extends StaxIdentityParser {
14

  
15
	List<Unite> units
16
	String[] ids
17
	File inputFile
18
	def open_id2Units = [:]
19
	def close_id2Units = [:]
20
	def writing_units = []
21
	def writing_stacks = []
22
	
23
	def stack = "";
24

  
25
	def writing_start, writing_end;
26
	
27
	def positions2id = [:] // used to relocate end of units
28

  
29
	public UnitsInserter(MainCorpus corpus, File inputFile, List<Unite> units, String type) {
30
		super(inputFile);
31
		this.inputFile = inputFile;
32
		this.units = units;
33
		
34
		this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: b.getFin() <=> a.getFin() }
35

  
36
		// get words id limits
37
		int[] positions = new int[units.size()];
38
		for( int i = 0 ; i < units.size() ; i++) {
39
			positions[i] = units.get(i).getDeb();
40
		}
41
		ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions);
42
		for (int i = 0 ; i < ids.length ; i++) {
43
			String id = ids[i]
44
			
45
			if (id != null) {
46
				positions2id[positions[i]] = id
47
				if (!open_id2Units.containsKey(id)) open_id2Units[id] = []
48
				open_id2Units[id] << units[i]
49
			}
50
		}
51

  
52
		positions = new int[units.size()];
53
		for( int i = 0 ; i < units.size() ; i++) {
54
			positions[i] = units.get(i).getFin();
55
		}
56
		ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions);
57
		for (int i = 0 ; i < ids.length ; i++) {
58
			String id = ids[i]
59
			if (id != null) {
60
				positions2id[positions[i]] = id
61
				if (!close_id2Units.containsKey(id)) close_id2Units[id] = new HashSet<Unite>()
62
				close_id2Units[id] << units[i]
63
			}
64
		}
65
	}
66

  
67
	boolean start = false;
68
	String word_id = null;
69
	@Override
70
	protected void processStartElement() throws XMLStreamException, IOException {
71

  
72
		stack += "/"+localname
73

  
74
		if ("text".equals(localname)) {
75
			start = true;
76
		} else if ("w".equals(localname) && start) {
77
			word_id = getParserAttributeValue("id");
78
			if (word_id == null) {
79
				println "Warning: found <w> without id at line "+parser.getLocation().getLineNumber()+" in "+inputFile
80
			} else {
81
				writeOpenUnits()
82
			}
83
		}
84

  
85
		super.processStartElement();
86
	}
87

  
88
	protected void writeOpenUnits() {
89
		
90
		def toWrite = open_id2Units[word_id]
91
		
92
		if (toWrite != null) {
93
			for (Unite unite : toWrite) {
94
				
95
				for (int i = 0 ; i < writing_units.size() ; i++) {
96
					Unite u = writing_units.get(i);
97
					if (unite.getFin() > u.getFin()) {
98
						// add unite to close_id2Units
99
						String id = positions2id[u.getFin()]
100
						close_id2Units[id] << unite // close the unite at the same moment
101
					}
102
				}
103
				
104
				writeUnit(unite);
105
			}
106
		}
107
	}
108

  
109
	protected void writeCloseUnits() {
110
		if (word_id != null) {
111
			def toClose = close_id2Units[word_id]
112
			if (toClose != null) {
113
				for (int i = 0 ; i < writing_units.size() ; i++) {
114
					Unite u = writing_units.get(i);
115
					if (toClose.contains(u)) {
116
						writing_stacks.remove(i)
117
						writing_units.remove(i)
118
						writer.writeEndElement();
119
						i--;
120
					}
121
				}
122
			}
123
		} else {
124
			for (int i = 0 ; i < writing_stacks.size() ; i++) {
125
				if (writing_stacks[i].equals(stack)) {
126
					writing_stacks.remove(i)
127
					writing_units.remove(i)
128
					writer.writeEndElement();
129
					i--
130
				}
131
			}
132
		}
133
	}
134

  
135
	protected void writeUnit(Unite currentUnit) {
136

  
137
		writing_units << currentUnit
138
		writing_stacks << currentUnit
139
		if (currentUnit.getDeb() > writing_start)
140

  
141
		writer.writeStartElement(currentUnit.getProp("type"));
142
		HashMap props = currentUnit.getProps();
143
		for (String p : props.keySet()) {
144
			if (p.equals("type")) continue; // ignore the type since written in tag name
145
			writer.writeAttribute(p, ""+props.get(p));
146
		}
147
	}
148

  
149
	@Override
150
	protected void processEndElement() throws XMLStreamException {
151
		
152
//		println "writing_stacks=$writing_stacks"
153
//		println "stack=$stack"
154
		
155
		if (writing_stacks.size() > 0 && writing_stacks[-1].equals(stack)) {
156
			writeCloseUnits()
157
		}
158

  
159
		super.processEndElement();
160
		
161
		stack = stack.substring(0, stack.length() - localname.length() - 1);
162
		
163
		if ("w".equals(localname)) {
164
			if (start && word_id != null) {
165
				writeCloseUnits()
166
			}
167
			word_id = null;
168
		}
169
	}
170
}
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/cqp/URSUnits2CQPWordsMacro.groovy (revision 3288)
1
package org.txm.macro.urs.prototypes.cqp
2

  
3
import org.kohsuke.args4j.*
4
import groovy.transform.Field
5
import org.txm.annotation.urs.*
6
import org.txm.importer.ValidateXml
7
import org.txm.rcp.swt.widget.parameters.*
8
import org.txm.searchengine.cqp.corpus.*
9
import visuAnalec.elements.*
10

  
11
// BEGINNING OF PARAMETERS
12

  
13
// Open the parameters input dialog box
14
//if (!ParametersDialog.open(this)) return;
15

  
16
if (!(corpusViewSelection instanceof MainCorpus)) {
17
	println "Selection must be a Corpus"
18
	return
19
}
20

  
21
@Field @Option(name="unit_type", usage="Corpus name in uppercase", widget="String", required=true, def="word")
22
String unit_type
23

  
24
@Field @Option(name="unit_properties", usage="Corpus name in uppercase", widget="String", required=true, def="*")
25
String unit_properties
26

  
27
@Field @Option(name="unit_word", usage="Corpus name in uppercase", widget="StringArray", metaVar="START	END	ALL", required=true, def="START")
28
String unit_word
29

  
30
if (!ParametersDialog.open(this)) return
31

  
32
MainCorpus corpus = corpusViewSelection
33
def analecCorpus = URSCorpora.getCorpus(corpus)
34

  
35
def texts = corpus.getCorpusTextIdsList();
36
def texts_startlimits = corpus.getTextStartLimits()
37
def texts_endlimits = corpus.getTextEndLimits()
38

  
39
for (int i = 0 ; i < texts.size() ; i++) {
40
	
41
	println "Processing annotations of "+texts[i]+"..."
42

  
43
	def text_id = texts[i]
44
	def text_start = texts_startlimits[i]
45
	def text_end = texts_endlimits[i]
46

  
47
	File xmltxmFile = new File(corpus.getProject().getProjectDirectory(), "txm/"+corpus.getID()+"/"+text_id+".xml")
48
	File xmltxmFileCopy = new File(corpus.getProject().getProjectDirectory(), text_id+"_copy.xml")
49

  
50
	if (!xmltxmFile.exists()) {
51
		println "Warning: no text file found: "+xmltxmFile
52
		continue
53
	}
54
	
55
	for (String uType : analecCorpus.getStructure().getTypes(Unite.class)) {
56
		def corpus_units = []
57
		ArrayList<Unite> all_units = analecCorpus.getUnites(uType)
58
		corpus_units = all_units.findAll() {
59
			unit_type.equals(it.getProp("type")) && text_start <= it.getDeb() && it.getFin() < text_end && !("true".equals(it.getProp("written")))
60
		}
61
		
62
		if (corpus_units.size() == 0) continue;
63
		
64
		try {
65
			println " processing word Units ${text_id} and its '$uType' units ("+corpus_units.size()+")"
66
			WordUnitsInserter inserter = new WordUnitsInserter(corpus, xmltxmFile, corpus_units, unit_properties, unit_word);
67
			if (inserter.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) {
68
				xmltxmFile.delete()
69
				xmltxmFileCopy.renameTo(xmltxmFile)
70

  
71
				for (Unite unit : corpus_units) {
72
					unit.getProps()["written"] = "true"
73
				}
74
				URSCorpora.saveCorpus(corpus);
75
				println "Done, "+corpus_units.size()+ " units written"
76

  
77
			} else {
78
				println "Error while processing the XML-TXM $xmltxmFile file"
79
				File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
80
				error.getParentFile().mkdirs()
81
				println "	moving created file to $error"
82
				error.delete()
83
				xmltxmFileCopy.renameTo(error)
84
			}
85
		} catch(Exception e) {
86
			println "Error while processing milestones $xmltxmFile file: "+e
87
			e.printStackTrace();
88
			File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
89
			error.getParentFile().mkdirs()
90
			println "	moving created file to $error"
91
			error.delete()
92
			xmltxmFileCopy.renameTo(error)
93
		}
94
	}
95
}
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/exploit/AllMesuresMacro.groovy (revision 3288)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.prototypes.exploit
7

  
8
import org.kohsuke.args4j.*
9

  
10
import groovy.transform.Field
11

  
12
import org.txm.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.apache.commons.lang.StringUtils;
17

  
18
// BEGINNING OF PARAMETERS
19

  
20
@Field @Option(name="tsvFile",usage="", widget="FileSave", required=true, def="result.tsv")
21
File tsvFile
22

  
23
@Field @Option(name="default_schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
24
String default_schema_ursql
25

  
26
@Field @Option(name="default_minimum_schema_size", usage="", widget="Integer", required=true, def="3")
27
int default_minimum_schema_size
28

  
29

  
30
@Field @Option(name="schema_property_display_name",usage="", widget="String", required=false, def="REF")
31
String schema_property_display_name
32

  
33
@Field @Option(name="default_unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
34
String default_unit_ursql
35

  
36
@Field @Option(name="default_word_property", usage="", widget="String", required=false, def="word")
37
String default_word_property
38
@Field @Option(name="default_pos_property", usage="", widget="String", required=false, def="CATEGORIE")
39
String default_pos_property
40

  
41
if (!ParametersDialog.open(this)) return;
42
// END OF PARAMETERS
43

  
44
println "Corpora selections: "+corpusViewSelections
45

  
46
table = [] // contains all table lines
47
mesures = []
48

  
49
for (def corpus : corpusViewSelections) { // for each corpus selected in the corpora view
50
	if (!(corpus instanceof MainCorpus)) continue; // check if the corpus is a maincorpus
51
	def line = [] ; table << line // create and add a new table line
52
	line << corpus.getID() // add the corpus name in the first column
53
	
54
	println "*** Computing mesures for $corpus" // each macro return a "result" and a "data"
55

  
56
	params = [
57
		"unit_ursql":default_unit_ursql,
58
	]
59
	returnedValue = execMesure(UnitsReferentialDensityMacro, line, corpus, params)
60
	line << returnedValue["result"]
61
	
62
	params = [
63
		"schema_ursql":default_schema_ursql,
64
		"minimum_schema_size":default_minimum_schema_size,
65
		"unit_ursql":default_unit_ursql,
66
	]
67
	returnedValue = execMesure(SchemaLengthsMacro, line, corpus, params)
68
	line << returnedValue["result"]
69
	
70
	params = [
71
		"schema_ursql":default_schema_ursql,
72
		"minimum_schema_size":default_minimum_schema_size,
73
		"unit_ursql":default_unit_ursql,
74
	]
75
	returnedValue = execMesure(NumberOfSchemaMacro, line, corpus, params)
76
	line << returnedValue["result"]
77
	
78
	params = [
79
		"schema_ursql":default_schema_ursql,
80
		"minimum_schema_size":default_minimum_schema_size,
81
		"schema_property_display_name":schema_property_display_name,
82
		"unit_ursql":default_unit_ursql+"@CATEGORIE=GN Défini|GN Démonstratif|Nom Propre",
83
		"word_property":default_word_property,
84
	]
85
	returnedValue = execMesure(UnitsStabilityScoreMacro, line, corpus, params)
86
	line << returnedValue["result"]
87
	
88
	params = [
89
		"schema_ursql":default_schema_ursql,
90
		"minimum_schema_size":default_minimum_schema_size,
91
		"unit_ursql":default_unit_ursql,
92
	]
93
	returnedValue = execMesure(UnitsInterDistanceMacro, line, corpus, params)
94
	line << returnedValue["result"]
95

  
96
	params = [
97
		"schema_ursql":default_schema_ursql,
98
		"minimum_schema_size":default_minimum_schema_size,
99
		"unit_ursql":default_unit_ursql,
100
		"word_property":default_pos_property,
101
	]
102
	returnedValue = execMesure(NatureOfTheFirstUnitMacro, line, corpus, params)
103
	line << returnedValue["data"]
104
	
105
	params = [
106
		"schema_ursql":default_schema_ursql,
107
		"minimum_schema_size":default_minimum_schema_size,
108
		"schema_property_display_name":schema_property_display_name,
109
		"unit_ursql":default_unit_ursql,
110
		"word_property":default_pos_property,
111
	]
112
	returnedValue = execMesure(GrammaticalCategoryMacro, line, corpus, params)
113
	line << returnedValue["data"]
114
}
115

  
116
// WRITE RESULTS IN THE TSV FILE
117
tsvFile.withWriter("UTF-8") { writer ->
118
	writer.println "\t"+mesures.join("\t")
119
	table.each { line -> writer.println line.join("\t")	}
120
}
121

  
122
println "Done. Results are saved in ${tsvFile.getAbsolutePath()} file."
123

  
124
// UTILITY FUNCTIONS
125
def execMesure(def mesure, def line, def corpus, def params) {
126
	def m = mesure.getSimpleName().substring(0, mesure.getSimpleName().indexOf("Macro"))
127
	mesures << m
128
	println "***** ${mesures.size()}- $m with parameters: $params"
129
	def r = gse.run(mesure, ["args":params, "corpusViewSelection":corpus, "monitor":monitor])
130
	if (r == null) throw new Exception("Null result");
131
	return r;
132
}
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/exploit/NatureOfTheFirstUnitMacro.groovy (revision 3288)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.prototypes.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.macro.urs.AnalecUtils
12
import visuAnalec.elements.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.corpus.*
16
import org.txm.searchengine.cqp.CQPSearchEngine
17
import org.apache.commons.lang.StringUtils;
18

  
19
if (!(corpusViewSelection instanceof CQPCorpus)) {
20
	println "Corpora selection is not a Corpus"
21
	return;
22
}
23

  
24
// BEGINNING OF PARAMETERS
25
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
26
String schema_ursql
27

  
28
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
29
int minimum_schema_size
30

  
31
@Field @Option(name="unit_ursql", usage="TYPE@PROP=REGEX", widget="String", required=false, def="MENTION")
32
String unit_ursql
33

  
34
@Field @Option(name="word_property", usage="", widget="String", required=false, def="CATEGORIE")
35
String word_property
36

  
37
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
38
debug
39

  
40
if (!ParametersDialog.open(this)) return;
41
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
42

  
43

  
44
CQPCorpus corpus = corpusViewSelection
45
def analecCorpus = URSCorpora.getCorpus(corpus)
46

  
47
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
48
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
49
	return;
50
}
51

  
52
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
53
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
54
	return;
55
}
56

  
57
def CQI = CQPSearchEngine.getCqiClient()
58

  
59
def prop = corpus.getProperty(word_property)
60

  
61
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
62
def freqs = [:]
63

  
64
def distances = 0;
65
def nDistances = 0
66
for (def schema : schemas) {
67
	
68
	def allUnites = schema.getUnitesSousjacentesNonTriees()
69
	
70
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
71
	
72
	int nUnites = units.size();
73
	
74
	if (units.size() == 0) continue;
75
	
76
	def unit = units[0]
77
	
78
	String forme =  null;
79
	if (prop == null) { // word_property is the analec unit property to use
80
		forme = unit.getProp(word_property)
81
	} else {
82
		int[] pos = null;
83
		if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
84
		else pos = unit.getDeb()..unit.getFin()
85
			
86
		forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
87
	}
88
	
89
	if (!freqs.containsKey(forme)) freqs[forme] = 0;
90
	
91
	freqs[forme] = freqs[forme] + 1;
92
}
93

  
94
println "Index des natures de premier maillon :"
95
int max = 0;
96
def result = "";
97
for (def forme : freqs.keySet().sort() {it -> -freqs[it]}) {
98
	println "$forme\t"+freqs[forme]
99
	if (max < freqs[forme]) {
100
		max = freqs[forme]
101
		result = "$forme: "+freqs[forme]
102
	}
103
}
104

  
105
["result": result, "data": freqs]
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/exploit/NumberOfSchemaMacro.groovy (revision 3288)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.prototypes.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import visuAnalec.elements.*
13
import org.txm.searchengine.cqp.corpus.*
14
import org.txm.macro.urs.AnalecUtils
15

  
16
if (!(corpusViewSelection instanceof CQPCorpus)) {
17
	println "Corpora selection is not a Corpus"
18
	return;
19
}
20

  
21
// BEGINNING OF PARAMETERS
22
@Field @Option(name="schema_ursql", usage="TYPE@PROP=REGEX", widget="String", required=true, def="CHAINE")
23
String schema_ursql
24

  
25
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
26
int minimum_schema_size
27

  
28
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
29
debug
30

  
31
if (!ParametersDialog.open(this)) return;
32
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
33

  
34

  
35
CQPCorpus corpus = corpusViewSelection
36
def analecCorpus = URSCorpora.getCorpus(corpus)
37

  
38
// check Schema parameters
39
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
40
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
41
	return;
42
}
43

  
44
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
45

  
46
int nSchemas = schemas.size();
47

  
48
println "Nombre de chaînes de référence d'un texte : $nSchemas"
49

  
50
["result":nSchemas, "data":schemas]
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/exploit/GrammaticalCategoryMacro.groovy (revision 3288)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macro.urs.prototypes.exploit
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.*
11
import org.txm.macro.urs.AnalecUtils
12
import visuAnalec.elements.*
13
import org.txm.rcp.swt.widget.parameters.*
14
import org.txm.annotation.urs.*
15
import org.txm.searchengine.cqp.*
16
import org.txm.searchengine.cqp.corpus.*
17
import org.apache.commons.lang.StringUtils;
18

  
19
// BEGINNING OF PARAMETERS
20

  
21
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=true, def="CHAINE")
22
String schema_ursql
23

  
24
@Field @Option(name="minimum_schema_size", usage="minimal schema size", widget="Integer", required=true, def="3")
25
int minimum_schema_size
26

  
27
@Field @Option(name="schema_display_property_name",usage="", widget="String", required=false, def="REF")
28
String schema_display_property_name
29

  
30
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
31
String unit_ursql
32

  
33
@Field @Option(name="property", usage="", widget="String", required=false, def="CATEGORIE")
34
String property
35

  
36
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
37
debug
38

  
39
if (!(corpusViewSelection instanceof CQPCorpus)) {
40
	println "Corpora selection is not a Corpus"
41
	return;
42
}
43

  
44
// Open the parameters input dialog box
45
if (!ParametersDialog.open(this)) return;
46
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
47

  
48
// END OF PARAMETERS
49

  
50
MainCorpus corpus = corpusViewSelection
51
def analecCorpus = URSCorpora.getCorpus(corpus)
52

  
53
if (!AnalecUtils.isPropertyDefined(Schema.class, analecCorpus, schema_ursql)) {
54
	println "** The $schema_ursql schema URSQL cannot be computed in the corpus."
55
	return;
56
}
57

  
58
if (!AnalecUtils.isPropertyDefined(Unite.class, analecCorpus, unit_ursql)) {
59
	println "** $unit_ursql unit URSQL cannot be computed in the corpus."
60
	return;
61
}
62

  
63
def CQI = CQPSearchEngine.getCqiClient()
64

  
65
def prop = corpus.getProperty(property)
66
if (prop == null) {
67
	analecCorpus.getStructure()
68
}
69
def schemas = AnalecUtils.selectSchemasInCorpus(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, 999999);
70
def allFreqs = [:]
71
def n = 0
72
for (def schema : schemas) {
73
	n++
74
	
75
	def freqs = [:]
76
		
77
	def allUnites = schema.getUnitesSousjacentesNonTriees()
78

  
79
	def units = AnalecUtils.filterElements(debug, allUnites, unit_ursql)
80
	
81
	for (def unit : units) { // no need to sort units
82

  
83
		String forme =  null;
84
		if (prop == null) { // property is the analec unit property to use
85
			forme = unit.getProp(property)
86
		} else {
87
			int[] pos = null;
88
			if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
89
			else pos = unit.getDeb()..unit.getFin()
90
				
91
			forme = StringUtils.join(CQI.cpos2Str(prop.getQualifiedName(), pos), " ") // ids is enough
92
		}
93
		
94
		if (!freqs.containsKey(forme)) freqs[forme] = 0;
95
		freqs[forme] = freqs[forme] + 1;
96
		
97
		if (!allFreqs.containsKey(forme)) allFreqs[forme] = 0;
98
		allFreqs[forme] = allFreqs[forme] + 1;
99
	}
100
	
101
	if (schema_display_property_name != null) {
102
		println "Index des natures de $unit_ursql de '"+schema.getProp(schema_display_property_name)+"' : "
103
	} else {
104
		println "Index des natures de $schema_ursql - $n : "
105
	}
106
	
107
	int max = 0;
108
	def result = "";
109
	for (def forme : freqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) {
110
		println forme.key+"\t"+forme.value
111
	}
112
}
113

  
114
int max = 0;
115
def result = "";
116

  
117
println "Index des natures de $schema_ursql : "
118
for (def forme : allFreqs.sort() { a, b -> -a.value <=> -b.value ?: a.key <=> b.key }) {
119
	println forme.key+"\t"+forme.value
120
	if (max < forme.value) {
121
		max = forme.value
122
		result = "$forme: "+forme.value
123
	}
124
}
125

  
126
return ["result":result, "data":allFreqs]
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/misc/CompUnitPropertiesMacro.groovy (revision 3288)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.apache.commons.lang.StringUtils;
9
import org.kohsuke.args4j.*
10
import groovy.transform.Field
11
import org.txm.Toolbox;
12
import org.txm.rcp.swt.widget.parameters.*
13
import org.txm.annotation.urs.*
14
import org.txm.searchengine.cqp.AbstractCqiClient;
15
import org.txm.searchengine.cqp.corpus.*
16
import org.txm.searchengine.cqp.CQPSearchEngine
17
import visuAnalec.donnees.Structure;
18
import visuAnalec.elements.Unite;
19

  
20
if (!(corpusViewSelection instanceof MainCorpus)) {
21
	println "Corpora selection is not a Corpus"
22
	return;
23
}
24

  
25
// BEGINNING OF PARAMETERS
26
@Field @Option(name="unit_type",usage="", widget="String", required=true, def="MENTION")
27
String unit_type
28

  
29
@Field @Option(name="print_diff",usage="", widget="Boolean", required=true, def="true")
30
boolean print_diff
31

  
32
@Field @Option(name="unit_property_name1", usage="", widget="String", required=false, def="CATEGORIE")
33
String unit_property_name1
34

  
35
@Field @Option(name="unit_property_name2", usage="", widget="String", required=false, def="CATEGORIE_ORIG")
36
String unit_property_name2
37

  
38
if (!ParametersDialog.open(this)) return;
39

  
40
int n = 1;
41
int nDiff = 0;
42
MainCorpus corpus = corpusViewSelection
43
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
44
def word = corpus.getWordProperty()
45
def analecCorpus = URSCorpora.getCorpus(corpus);
46

  
47
def units = analecCorpus.getUnites(unit_type)
48
units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
49
for (Unite unit : units) {
50
	int[] pos = null
51
	if (unit.getDeb() == unit.getFin()) pos = [unit.getDeb()]
52
	else pos = (unit.getDeb()..unit.getFin())
53
	def form = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos), " ")
54
	def props = unit.getProps()
55
	def v1 = props.get(unit_property_name1);
56
	def v2 = props.get(unit_property_name2);
57
	
58
	if (v1 != v2) {
59
		if (print_diff) println "$n - ${unit.getDeb()} -> ${unit.getFin()} - $props : $form"
60
		nDiff++
61
	}
62
	n++
63
}
64

  
65
if (nDiff == 0) println "$unit_property_name1 and $unit_property_name2 have the same values."
66
else println "$unit_property_name1 and $unit_property_name2 have $nDiff/$n different values."
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/misc/RelationsListMacro.groovy (revision 3288)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.apache.commons.lang.StringUtils;
9
import org.apache.tools.ant.types.resources.selectors.InstanceOf;
10
import org.kohsuke.args4j.*
11

  
12
import groovy.transform.Field
13

  
14
import org.txm.Toolbox;
15
import org.txm.rcp.swt.widget.parameters.*
16
import org.txm.annotation.urs.*
17
import org.txm.searchengine.cqp.AbstractCqiClient;
18
import org.txm.searchengine.cqp.corpus.*
19
import org.txm.searchengine.cqp.CQPSearchEngine
20

  
21
import visuAnalec.donnees.Structure;
22
import visuAnalec.elements.Relation
23
import visuAnalec.elements.Unite;
24

  
25
if (!(corpusViewSelection instanceof MainCorpus)) {
26
	println "Corpora selection is not a Corpus"
27
	return;
28
}
29

  
30
// BEGINNING OF PARAMETERS
31
@Field @Option(name="relation_type",usage="", widget="String", required=true, def="ANAPHORE")
32
String relation_type
33

  
34
if (!ParametersDialog.open(this)) return;
35

  
36
MainCorpus corpus = corpusViewSelection
37
AbstractCqiClient CQI = CQPSearchEngine.getCqiClient();
38
def word = corpus.getWordProperty()
39
visuAnalec.donnees.Corpus analecCorpus = URSCorpora.getCorpus(corpus);
40

  
41
int n = 1;
42
def relations = null
43
if (relation_type.length() > 0) {
44
	relations = []
45
	for (String type : analecCorpus.getStructure().getTypes(Relation.class))
46
		relations.addAll(analecCorpus.getRelations(type))
47
} else {
48
	relations = analecCorpus.getToutesRelations()
49
}
50

  
51
for (Relation relation : relations) {
52
	def unit1 = relation.getElt1();
53
	def unit2 = relation.getElt2();
54
	def props = relation.getProps()
55
	if (unit1 instanceof Unite && unit2 instanceof Unite) {
56
		int[] pos1 = null
57
		if (unit1.getDeb() == unit1.getFin()) pos1 = [unit1.getDeb()]
58
		else pos1 = (unit1.getDeb()..unit1.getFin())
59
		def form1 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos1), " ")
60
		
61
		int[] pos2 = null
62
		if (unit2.getDeb() == unit2.getFin()) pos2 = [unit2.getDeb()]
63
		else pos2 = (unit2.getDeb()..unit2.getFin())
64
		def form2 = StringUtils.join(CQI.cpos2Str(word.getQualifiedName(), pos2), " ")
65
		
66
		println "$n - $props : $form1 -> $form2"
67
	} else {
68
		println "$n - $props"
69
	}
70
	n++
71
}
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/misc/UnitTypesInSchemaMacro.groovy (revision 3288)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

  
14
if (!(corpusViewSelection instanceof MainCorpus)) {
15
	println "Corpora selection is not a Corpus"
16
	return;
17
}
18

  
19
// BEGINNING OF PARAMETERS
20
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE")
21
String schema_type
22

  
23
if (!ParametersDialog.open(this)) return;
24

  
25
MainCorpus corpus = corpusViewSelection
26
def analecCorpus = URSCorpora.getCorpus(corpus);
27
def map = new HashMap()
28
def unitesInSchema = []
29
def n = 0
30
for (def schema : analecCorpus.getSchemas(schema_type)) {
31
	def unites = schema.getUnitesSousjacentes()
32
	unitesInSchema.addAll(unites)
33
	n += unites.size()
34
}
35

  
36
def counts = unitesInSchema.countBy() { it };
37
for (def c : counts.keySet()) {
38
	if (counts[c] > 1) println "ERROR UNIT IN MULTIPLE SCHEMA["+c.getDeb()+", "+c.getFin()+"]="+c.getProps()+" in "+c.getSchemas().collect() {it.getProps()}
39
}
40

  
41
def set = new HashSet()
42
set.addAll(unitesInSchema)
43
for (def s : set.collect { it.getType() }) {
44
	if (!map.containsKey(s)) map[s] = 0;
45
	map[s] = map[s] +1
46
}
47

  
48
println "Unites types: "+map.sort() { it -> map[it]}
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/misc/UnitTypesNotInSchemaMacro.groovy (revision 3288)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import org.kohsuke.args4j.*
9
import groovy.transform.Field
10
import org.txm.rcp.swt.widget.parameters.*
11
import org.txm.annotation.urs.*
12
import org.txm.searchengine.cqp.corpus.*
13

  
14
if (!(corpusViewSelection instanceof MainCorpus)) {
15
	println "Corpora selection is not a Corpus"
16
	return;
17
}
18

  
19
// BEGINNING OF PARAMETERS
20
@Field @Option(name="schema_type",usage="", widget="String", required=true, def="CHAINE")
21
String schema_type
22
if (!ParametersDialog.open(this)) return;
23

  
24
MainCorpus corpus = corpusViewSelection
25
def analecCorpus = URSCorpora.getCorpus(corpus);
26

  
27
def unitesInSchema = new HashSet()
28
for (def schema : analecCorpus.getSchemas(schema_type)) {
29
	unitesInSchema.addAll(schema.getUnitesSousjacentes())
30
}
31
println "unites: "+analecCorpus.getToutesUnites().size()
32
println "unites in schema: "+unitesInSchema.size()
33

  
34
def set = new HashMap()
35
for (def u : analecCorpus.getToutesUnites()) {
36
	if (unitesInSchema.contains(u)) continue;
37
	
38
	if (!set.containsKey(u.getType())) set[u.getType()] = 0;
39
	set[u.getType()] = set[u.getType()] +1
40
}
41

  
42
println "unites not in schema: "+set.sort() { it -> set[it]}
TXM/trunk/org.txm.analec.rcp/src/org/txm/macro/urs/prototypes/misc/UnitsProgressionMacro.groovy (revision 3288)
1
// Copyright © 2016 ENS de Lyon, CNRS, University of Franche-Comté
2
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
3
// @author mdecorde
4
// @author sheiden
5
// STANDARD DECLARATIONS
6
package org.txm.macroprototypes.urs.misc
7

  
8
import java.util.ArrayList;
9
import java.util.List;
10

  
11
import org.apache.commons.lang.StringUtils
12
import org.jfree.chart.JFreeChart
13
import org.jfree.chart.plot.XYPlot
14
import org.kohsuke.args4j.*
15

  
16
import groovy.transform.Field
17

  
18
import org.txm.Toolbox
19
import org.txm.progression.core.chartsengine.jfreechart.themes.highcharts.renderers.ProgressionItemSelectionRenderer
20
import org.txm.progression.core.functions.Progression
21
import org.txm.rcp.swt.widget.parameters.*
22
import org.txm.annotation.urs.*
23
import org.txm.chartsengine.rcp.editors.ChartEditor
24
import org.txm.macro.urs.AnalecUtils
25
import org.txm.searchengine.cqp.AbstractCqiClient
26
import org.txm.searchengine.cqp.corpus.*
27
import org.txm.searchengine.cqp.corpus.query.Match;
28
import org.txm.searchengine.cqp.corpus.query.CQLQuery
29
import org.txm.rcp.Application
30
import org.txm.rcp.IImageKeys
31

  
32
import visuAnalec.donnees.Structure
33
import visuAnalec.elements.*
34

  
35
def scriptName = this.class.getSimpleName()
36
def parent
37
def selection = []
38
if (!(corpusViewSelection instanceof CQPCorpus)) {
39
	println "** $scriptName please select a Corpus to run the macro"
40
}
41
selection << corpusViewSelection
42
parent = corpusViewSelection
43

  
44
// BEGINNING OF PARAMETERS
45
@Field @Option(name="schema_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="CHAINE")
46
		String schema_ursql
47
@Field @Option(name="minimum_schema_size", usage="Minimum size needed to consider a schema", widget="Integer", required=true, def="3")
48
		int minimum_schema_size
49
@Field @Option(name="maximum_schema_size", usage="Maximum size needed to consider a schema", widget="Integer", required=true, def="9999999")
50
		int maximum_schema_size
51
@Field @Option(name="unit_ursql", usage="TYPE@PROP=VALUE", widget="String", required=false, def="MENTION")
52
		String unit_ursql
53
@Field @Option(name="limit_distance_in_schema", usage="Unit distance in schema (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
54
		int limit_distance_in_schema
55
@Field @Option(name="limit_cql", usage="CQL to build structure limits", widget="Query", required=true, def="<div> [] expand to div")
56
		limit_cql
57
@Field @Option(name="strict_inclusion", usage="Units must be strictly included into corpus matches", widget="Boolean", required=true, def="true")
58
		boolean strict_inclusion
59
@Field @Option(name="limit_distance", usage="Unit distance to structure limit (0 = no selection, 1 = first after limit, -1 = last before limit, etc.)", widget="Integer", required=true, def="0")
60
		int limit_distance
61
@Field @Option(name="unit_property_display", usage="Unit property to count", widget="String", required=true, def="CATEGORIE")
62
		String unit_property_display
63
@Field @Option(name="struct_name", usage="Structure to display", widget="String", required=true, def="div")
64
		String struct_name
65
@Field @Option(name="struct_prop", usage="Structure property to display", widget="String", required=true, def="n")
66
		String struct_prop
67
@Field @Option(name="line_width", usage="line width", widget="Integer", required=true, def="1")
68
		int line_width = 2
69
@Field @Option(name="bande_width", usage="bande width", widget="Float", required=true, def="1.0f")
70
		float bande_width = 1.0f
71
@Field @Option(name="debug", usage="Show internal variable content", widget="StringArray", metaVar="OFF	ON	ALL	REALLY ALL", required=true, def="OFF")
72
		debug
73
if (!ParametersDialog.open(this)) return
74
if (debug == "OFF") debug = 0; else if (debug == "ON") debug = 1; else if (debug == "ALL") debug = 2 else if (debug == "REALLY ALL") debug = 3
75

  
76

  
77
	def CQI = CQPSearchEngine.getCqiClient()
78

  
79
def queries = []
80
def queryResults = []
81
def informations = []
82
for (def corpus : selection) {
83
	
84
	mainCorpus = corpus.getMainCorpus()
85

  
86
	def word = mainCorpus.getWordProperty()
87
	def analecCorpus = URSCorpora.getCorpus(mainCorpus.getName())
88

  
89
	def selectedUnits = AnalecUtils.selectUnitsInSchema(debug, analecCorpus, corpus, schema_ursql, minimum_schema_size, maximum_schema_size, 
90
	unit_ursql, limit_distance_in_schema, limit_cql, strict_inclusion, limit_distance);
91

  
92
	def query = ""
93
	if (limit_cql != null && !limit_cql.getQueryString().equals("\"\"")) query += limit_cql
94
	if (schema_ursql != null && schema_ursql.length() > 0) { if (query.length() > 0) query += " & "; query += ""+schema_ursql+ " >"}
95
	if (unit_ursql != null && unit_ursql.length() > 0) query += " "+unit_ursql
96
	query = new CQLQuery(query)
97
	int[] starts = new int[selectedUnits.size()];
98
	int[] ends = new int[selectedUnits.size()];
99
	def unitsinformations = []
100
	int n = 0;
101
	for (Unite unite : selectedUnits) {
102
		starts[n] = unite.getDeb()
103
		ends[n] = unite.getFin()
104
		unitsinformations << AnalecUtils.toString(CQI, word, unite);
105
		n++
106
	}
107
	def queryResult = new FakeQueryResult(corpus.getID(), corpus, query, starts, ends, null)
108
	queries << query
109
	queryResults << queryResult
110
	informations << unitsinformations
111

  
112
	if (unit_property_display != null && unit_property_display.length() > 0) {
113
		def propvalues = [:]
114
		for (def unit : selectedUnits) {
115
			def v = unit.getProp(unit_property_display)
116
			if (v == null) v = "<null>"
117
			else if (v.length() == 0) v = "<empty>"
118
			
... Ce différentiel a été tronqué car il excède la taille maximale pouvant être affichée.

Formats disponibles : Unified diff