Revision 2087

tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/cqp/URSUnits2CQPStructMacro.groovy (revision 2087)
1
// STANDARD DECLARATIONS
2
package org.txm.macroprototypes.urs.cqp
3

  
4
import org.kohsuke.args4j.*
5
import groovy.transform.Field
6
import org.txm.annotation.urs.*
7
import org.txm.importer.ValidateXml
8
import org.txm.rcp.swt.widget.parameters.*
9
import org.txm.searchengine.cqp.corpus.*
10
import visuAnalec.elements.*
11

  
12
// BEGINNING OF PARAMETERS
13

  
14
// Open the parameters input dialog box
15
//if (!ParametersDialog.open(this)) return;
16

  
17
if (!(corpusViewSelection instanceof MainCorpus)) {
18
	println "Selection must be a Corpus"
19
	return
20
}
21

  
22
MainCorpus corpus = corpusViewSelection
23
def analecCorpus = URSCorpora.getCorpus(corpus)
24

  
25
def texts = corpus.getCorpusTextIdsList();
26
def texts_startlimits = corpus.getTextStartLimits()
27
def texts_endlimits = corpus.getTextEndLimits()
28

  
29
for (int i = 0 ; i < texts.size() ; i++) {
30

  
31
	println "Processing annotations of "+texts[i]+"..."
32

  
33
	def text_id = texts[i]
34
	def text_start = texts_startlimits[i]
35
	def text_end = texts_endlimits[i]
36

  
37
	File xmltxmFile = new File(corpus.getProject().getProjectDirectory(), "txm/"+corpus.getID()+"/"+text_id+".xml")
38
	File xmltxmFileCopy = new File(corpus.getProject().getProjectDirectory(), text_id+"_copy.xml")
39

  
40
	if (!xmltxmFile.exists()) {
41
		println "Warning: no text file found: "+xmltxmFile
42
		continue
43
	}
44

  
45
	// WRITE MILESTONES UNITS
46

  
47
	println "-> MILESTONES UNITS"
48

  
49
	def units = []
50
	for (String unit_type : analecCorpus.getStructure().getTypes(Unite.class)) {
51
		def corpus_units = []
52
		ArrayList<Unite> all_units = analecCorpus.getUnites(unit_type)
53
		corpus_units = all_units.findAll() {
54
			it.getProp("type") != null && it.getProp("type").trim().length() > 0 && "yes".equals(it.getProp("milestone")) && text_start <= it.getDeb() && it.getFin() < text_end && !("true".equals(it.getProp("written")))
55
		}
56
		units.addAll(corpus_units)
57
	}
58

  
59
	if (units.size()== 0) {
60
		println "No milestones to write"
61
	} else {
62
		try {
63
			println "processing milestones Units ${text_id} and its units "+units.size()
64
			MileStoneInserter inserter = new MileStoneInserter(corpus, xmltxmFile, units);
65
			if (inserter.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) {
66
				xmltxmFile.delete()
67
				xmltxmFileCopy.renameTo(xmltxmFile)
68

  
69
				for (Unite unit : units) {
70
					unit.getProps()["written"] = "true"
71
				}
72
				URSCorpora.saveCorpus(corpus);
73
				println "Done, "+units.size()+ " milestones written"
74

  
75
			} else {
76
				println "Error while processing milestones $xmltxmFile file"
77
				File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
78
				error.getParentFile().mkdirs()
79
				println "	moving created file to $error"
80
				error.delete()
81
				xmltxmFileCopy.renameTo(error)
82
			}
83
		} catch(Exception e) {
84
			println "Error while processing milestones $xmltxmFile file: "+e
85
			File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
86
			error.getParentFile().mkdirs()
87
			println "	moving created file to $error"
88
			error.delete()
89
			xmltxmFileCopy.renameTo(error)
90
		}
91
	}
92
	// WRITE NON MILESTONES UNITS
93
	println "-> OTHER UNITS"
94

  
95
	for (String unit_type : analecCorpus.getStructure().getTypes(Unite.class)) {
96
		
97
		def corpus_units = []
98
		ArrayList<Unite> all_units = analecCorpus.getUnites(unit_type)
99
		corpus_units = all_units.findAll() {
100
			it.getProp("type") != null && it.getProp("type").trim().length() > 0 && !"no".equals(it.getProp("milestone")) && text_start <= it.getDeb() && it.getFin() < text_end && !("true".equals(it.getProp("written")))
101
		}
102

  
103
		if (corpus_units.size() == 0) continue;
104
		
105
		println "processing Units ${text_id} and its $unit_type units "+corpus_units.size()
106
		UnitsInserter inserter2 = new UnitsInserter(corpus, xmltxmFile, corpus_units, unit_type);
107
		try {
108
			if (inserter2.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) {
109
				xmltxmFile.delete()
110
				xmltxmFileCopy.renameTo(xmltxmFile)
111

  
112
				for (Unite unit : corpus_units) {
113
					unit.getProps()["written"] = "true"
114
				}
115
				URSCorpora.saveCorpus(corpus);
116
				println "Done, "+corpus_units.size()+ " units written"
117

  
118
			} else {
119
				println "Error while processing milestones $xmltxmFile file"
120
				File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
121
				error.getParentFile().mkdirs()
122
				println "	moving created file to $error"
123
				error.delete()
124
				xmltxmFileCopy.renameTo(error)
125
			}
126
		} catch(Exception e) {
127
			println "Error while processing milestones $xmltxmFile file: "+e
128
			File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
129
			error.getParentFile().mkdirs()
130
			println "	moving created file to $error"
131
			error.delete()
132
			xmltxmFileCopy.renameTo(error)
133
		}
134
	}
135
}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/cqp/WordUnitsInserter.groovy (revision 2087)
1
package org.txm.macroprototypes.urs.cqp
2

  
3
import java.io.IOException
4
import java.util.Date
5
import java.util.LinkedHashMap
6

  
7
import javax.xml.stream.XMLStreamException
8

  
9
import org.txm.Toolbox
10
import org.txm.importer.StaxIdentityParser
11
import org.txm.macro.urs.AnalecUtils
12
import org.txm.scripts.importer.GetAttributeValue
13
import org.txm.searchengine.cqp.CQPSearchEngine
14
import org.txm.searchengine.cqp.corpus.MainCorpus
15
import visuAnalec.elements.Unite
16

  
17
public class WordUnitsInserter extends StaxIdentityParser {
18

  
19
	List<Unite> units
20
	String[] ids
21
	File inputFile
22
	def id2Units = [:]
23

  
24
	LinkedHashMap<String, String> anaValues = new LinkedHashMap<String, String>();
25

  
26
	public WordUnitsInserter(MainCorpus corpus, File inputFile, List<Unite> units) {
27
		super(inputFile)
28
		this.inputFile = inputFile
29

  
30
		this.units = units
31
		this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
32

  
33
		for (int i = 0 ; i < units.size() ; i++) {
34
			Unite u = units[i]
35
			int[] positions = u.getDeb()..u.getFin()
36

  
37
			ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions)
38

  
39
			for (String id : ids) {
40
				if (!id2Units.containsKey(id)) {
41
					id2Units[id] = []
42
				}
43
				id2Units[id] << u
44
			}
45
		}
46
	}
47

  
48
	boolean inAna = false
49
	String ana_type, ana_resp, ana_value
50
	boolean inW = false
51
	String word_id = null
52

  
53
	protected void processStartElement() throws XMLStreamException, IOException {
54

  
55
		if ("w".equals(localname)) {
56
			
57
			inW = true
58
			word_id = this.getParserAttributeValue("id")
59

  
60
			if (id2Units.containsKey(word_id)) {
61
				for (Unite u : id2Units[word_id]) {
62
					def props = u.getProps();
63
					for (String p : props.keySet()) {
64
						if ("type".equals(p)) continue;
65
						if ("written".equals(p)) continue;
66
						if ("milestone".equals(p)) continue;
67
						
68
						if (!anaValues.containsKey(p)) {
69
							anaValues[p] = ""
70
							ana_resp = "#txm"
71
						}
72
						anaValues[p] = (anaValues[p]+" "+props.get(p)).trim()
73
					}
74
				}
75
				if (anaValues.size() > 0) println anaValues
76
			}
77

  
78
			super.processStartElement(); // write the tag
79
			
80
		} else if ("ana".equals(localname) && inW) {
81
			
82
			inAna = true
83
			ana_type = this.getParserAttributeValue("type").substring(1)
84
			ana_resp = this.getParserAttributeValue("resp")
85
			ana_value = ""
86
			
87
		} else {
88
			super.processStartElement()
89
		}
90
	}
91

  
92
	@Override
93
	public void processCharacters() throws XMLStreamException {
94
		if (inAna) {
95
			ana_value += parser.getText().trim()
96
		} else {
97
			super.processCharacters()
98
		}
99
	}
100

  
101
	protected void processEndElement() throws XMLStreamException {
102

  
103
		if ("w".equals(localname)) {
104
			
105
			// write the last values
106
			for (String ana_type : anaValues.keySet()) {
107
				writer.writeStartElement("txm:ana")
108
				writer.writeAttribute("type", "#" + ana_type)
109
				writer.writeAttribute("resp", "#txm") // change
110
				writer.writeCharacters(anaValues[ana_type])
111
				writer.writeEndElement()
112
			}
113
			
114
			anaValues.clear()
115
			super.processEndElement() // finally write word then close annotations
116
			inW = false
117
			
118
		} else if ("ana".equals(localname) && inW) {
119

  
120
			if (!anaValues.containsKey(ana_type)) {
121
				anaValues[ana_type] = ana_value.trim()
122
			} else {
123
				ana_resp = "#txm" // set the resp to txm since anaValues update the ana value
124
				anaValues[ana_type] = (anaValues[ana_type]+" "+ana_value.trim()).trim()
125
			}
126

  
127
			String value = anaValues[ana_type]
128

  
129
			writer.writeStartElement("txm:ana")
130
			writer.writeAttribute("type", "#" + ana_type)
131
			writer.writeAttribute("resp", ana_resp) // change
132
			writer.writeCharacters(value)
133
			writer.writeEndElement()
134
			
135
			anaValues.remove(ana_type)
136
			
137
			inAna = false
138
			ana_type = null
139
			ana_resp = null
140
			ana_value = null
141

  
142
			// write ana later
143
		} else {
144
			super.processEndElement()
145
		}
146
	}
147
}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/cqp/MileStoneInserter.groovy (revision 2087)
1
package org.txm.macroprototypes.urs.cqp
2

  
3
import java.io.IOException
4

  
5
import javax.xml.stream.XMLStreamException
6

  
7
import org.txm.importer.StaxIdentityParser
8
import org.txm.macro.urs.AnalecUtils
9
import org.txm.searchengine.cqp.CQPSearchEngine
10
import org.txm.searchengine.cqp.corpus.MainCorpus
11
import visuAnalec.elements.Unite
12

  
13
public class MileStoneInserter extends StaxIdentityParser {
14
	
15
	List<Unite> units
16
	String[] ids
17
	File inputFile
18
	def id2Units = [:]
19
	
20
	public MileStoneInserter(MainCorpus corpus, File inputFile, List<Unite> units) {
21
		super(inputFile);
22
		this.inputFile = inputFile;
23
		
24
		this.units = units;
25
		this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: a.getFin() <=> b.getFin() }
26
		
27
		int[] positions = new int[units.size()];
28
		for( int i = 0 ; i < units.size() ; i++) {
29
			positions[i] = units.get(i).getDeb();
30
		}
31
		
32
		ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions);
33
		
34
		for (int i = 0 ; i < ids.length ; i++) {
35
			String id = ids[i]
36
			if (id != null) {
37
				if (!id2Units.containsKey(id)) id2Units[id] = []
38
				
39
				id2Units[id] << units[i]
40
			}
41
		}
42
	}
43
	
44
	boolean start = false;
45
	String word_id = null;
46
	@Override
47
	protected void processStartElement() throws XMLStreamException, IOException {
48
		
49
		if ("text".equals(localname)) {
50
			start = true;
51
		} else if ("w".equals(localname) && start) {
52
			word_id = getParserAttributeValue("id");
53
			if (word_id == null) {
54
				println "Warning: found <w> without id at line "+parser.getLocation().getLineNumber()+" in "+inputFile
55
			} else {
56
				writeAllUnits(word_id, "before")
57
			}
58
		}
59
		
60
		super.processStartElement();
61
	}
62
	
63
	protected void writeAllUnits(String id, String position) {
64
		
65
		def units = id2Units[id]
66
		if (units == null) return; // no units to write
67
		
68
		for (Unite currentUnit : units) {
69
			
70
			if (!position.equals(currentUnit.getProp("position"))) return;
71
				
72
			writer.writeStartElement(currentUnit.getProp("type"));
73
			HashMap props = currentUnit.getProps();
74
			for (String p : props.keySet()) {
75
				if (p.equals("type")) continue; // ignore the type since written in tag name
76
				writer.writeAttribute(p, ""+props.get(p));
77
			}
78
			writer.writeEndElement();
79
		}
80
	}
81
	
82
	@Override
83
	protected void processEndElement() throws XMLStreamException {
84
		super.processEndElement();
85
	
86
		if ("w".equals(localname) && start && word_id != null) {
87
			writeAllUnits(word_id, "after")
88
		}
89
		
90
		if ("w".equals(localname)) {
91
			word_id = null;
92
		}
93
	}
94
}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/cqp/UnitsInserter.groovy (revision 2087)
1
package org.txm.macroprototypes.urs.cqp
2

  
3
import java.io.IOException
4

  
5
import javax.xml.stream.XMLStreamException
6

  
7
import org.txm.importer.StaxIdentityParser
8
import org.txm.macro.urs.AnalecUtils
9
import org.txm.searchengine.cqp.CQPSearchEngine
10
import org.txm.searchengine.cqp.corpus.MainCorpus
11
import visuAnalec.elements.Unite
12

  
13
public class UnitsInserter extends StaxIdentityParser {
14

  
15
	List<Unite> units
16
	String[] ids
17
	File inputFile
18
	def open_id2Units = [:]
19
	def close_id2Units = [:]
20
	def writing_units = []
21
	def writing_stacks = []
22
	
23
	def stack = "";
24

  
25
	def writing_start, writing_end;
26
	
27
	def positions2id = [:] // used to relocate end of units
28

  
29
	public UnitsInserter(MainCorpus corpus, File inputFile, List<Unite> units, String type) {
30
		super(inputFile);
31
		this.inputFile = inputFile;
32
		this.units = units;
33
		
34
		this.units.sort() { a, b -> a.getDeb() <=> b.getDeb() ?: b.getFin() <=> a.getFin() }
35

  
36
		// get words id limits
37
		int[] positions = new int[units.size()];
38
		for( int i = 0 ; i < units.size() ; i++) {
39
			positions[i] = units.get(i).getDeb();
40
		}
41
		ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions);
42
		for (int i = 0 ; i < ids.length ; i++) {
43
			String id = ids[i]
44
			
45
			if (id != null) {
46
				positions2id[positions[i]] = id
47
				if (!open_id2Units.containsKey(id)) open_id2Units[id] = []
48
				open_id2Units[id] << units[i]
49
			}
50
		}
51

  
52
		positions = new int[units.size()];
53
		for( int i = 0 ; i < units.size() ; i++) {
54
			positions[i] = units.get(i).getFin();
55
		}
56
		ids = CQPSearchEngine.getCqiClient().cpos2Str(corpus.getProperty("id").getQualifiedName(), positions);
57
		for (int i = 0 ; i < ids.length ; i++) {
58
			String id = ids[i]
59
			if (id != null) {
60
				positions2id[positions[i]] = id
61
				if (!close_id2Units.containsKey(id)) close_id2Units[id] = new HashSet<Unite>()
62
				close_id2Units[id] << units[i]
63
			}
64
		}
65
	}
66

  
67
	boolean start = false;
68
	String word_id = null;
69
	@Override
70
	protected void processStartElement() throws XMLStreamException, IOException {
71

  
72
		stack += "/"+localname
73

  
74
		if ("text".equals(localname)) {
75
			start = true;
76
		} else if ("w".equals(localname) && start) {
77
			word_id = getParserAttributeValue("id");
78
			if (word_id == null) {
79
				println "Warning: found <w> without id at line "+parser.getLocation().getLineNumber()+" in "+inputFile
80
			} else {
81
				writeOpenUnits()
82
			}
83
		}
84

  
85
		super.processStartElement();
86
	}
87

  
88
	protected void writeOpenUnits() {
89
		
90
		def toWrite = open_id2Units[word_id]
91
		
92
		if (toWrite != null) {
93
			for (Unite unite : toWrite) {
94
				
95
				for (int i = 0 ; i < writing_units.size() ; i++) {
96
					Unite u = writing_units.get(i);
97
					if (unite.getFin() > u.getFin()) {
98
						// add unite to close_id2Units
99
						String id = positions2id[u.getFin()]
100
						close_id2Units[id] << unite // close the unite at the same moment
101
					}
102
				}
103
				
104
				writeUnit(unite);
105
			}
106
		}
107
	}
108

  
109
	protected void writeCloseUnits() {
110
		if (word_id != null) {
111
			def toClose = close_id2Units[word_id]
112
			if (toClose != null) {
113
				for (int i = 0 ; i < writing_units.size() ; i++) {
114
					Unite u = writing_units.get(i);
115
					if (toClose.contains(u)) {
116
						writing_stacks.remove(i)
117
						writing_units.remove(i)
118
						writer.writeEndElement();
119
						i--;
120
					}
121
				}
122
			}
123
		} else {
124
			for (int i = 0 ; i < writing_stacks.size() ; i++) {
125
				if (writing_stacks[i].equals(stack)) {
126
					writing_stacks.remove(i)
127
					writing_units.remove(i)
128
					writer.writeEndElement();
129
					i--
130
				}
131
			}
132
		}
133
	}
134

  
135
	protected void writeUnit(Unite currentUnit) {
136

  
137
		writing_units << currentUnit
138
		writing_stacks << currentUnit
139
		if (currentUnit.getDeb() > writing_start)
140

  
141
		writer.writeStartElement(currentUnit.getProp("type"));
142
		HashMap props = currentUnit.getProps();
143
		for (String p : props.keySet()) {
144
			if (p.equals("type")) continue; // ignore the type since written in tag name
145
			writer.writeAttribute(p, ""+props.get(p));
146
		}
147
	}
148

  
149
	@Override
150
	protected void processEndElement() throws XMLStreamException {
151
		
152
//		println "writing_stacks=$writing_stacks"
153
//		println "stack=$stack"
154
		
155
		if (writing_stacks.size() > 0 && writing_stacks[-1].equals(stack)) {
156
			writeCloseUnits()
157
		}
158

  
159
		super.processEndElement();
160
		
161
		stack = stack.substring(0, stack.length() - localname.length() - 1);
162
		
163
		if ("w".equals(localname)) {
164
			if (start && word_id != null) {
165
				writeCloseUnits()
166
			}
167
			word_id = null;
168
		}
169
	}
170
}
tmp/org.txm.analec.rcp/src/org/txm/macroprototypes/urs/cqp/URSUnits2CQPWordsMacro.groovy (revision 2087)
1
package org.txm.macroprototypes.urs.cqp
2

  
3
import org.kohsuke.args4j.*
4
import groovy.transform.Field
5
import org.txm.annotation.urs.*
6
import org.txm.importer.ValidateXml
7
import org.txm.rcp.swt.widget.parameters.*
8
import org.txm.searchengine.cqp.corpus.*
9
import visuAnalec.elements.*
10

  
11
// BEGINNING OF PARAMETERS
12

  
13
// Open the parameters input dialog box
14
//if (!ParametersDialog.open(this)) return;
15

  
16
if (!(corpusViewSelection instanceof MainCorpus)) {
17
	println "Selection must be a Corpus"
18
	return
19
}
20

  
21
MainCorpus corpus = corpusViewSelection
22
def analecCorpus = URSCorpora.getCorpus(corpus)
23

  
24
def texts = corpus.getCorpusTextIdsList();
25
def texts_startlimits = corpus.getTextStartLimits()
26
def texts_endlimits = corpus.getTextEndLimits()
27

  
28
for (int i = 0 ; i < texts.size() ; i++) {
29
	
30
	println "Processing annotations of "+texts[i]+"..."
31

  
32
	def text_id = texts[i]
33
	def text_start = texts_startlimits[i]
34
	def text_end = texts_endlimits[i]
35

  
36
	File xmltxmFile = new File(corpus.getProject().getProjectDirectory(), "txm/"+corpus.getID()+"/"+text_id+".xml")
37
	File xmltxmFileCopy = new File(corpus.getProject().getProjectDirectory(), text_id+"_copy.xml")
38

  
39
	if (!xmltxmFile.exists()) {
40
		println "Warning: no text file found: "+xmltxmFile
41
		continue
42
	}
43
	
44
	for (String unit_type : analecCorpus.getStructure().getTypes(Unite.class)) {
45
		def corpus_units = []
46
		ArrayList<Unite> all_units = analecCorpus.getUnites(unit_type)
47
		corpus_units = all_units.findAll() {
48
			"word".equals(it.getProp("type")) && text_start <= it.getDeb() && it.getFin() < text_end && !("true".equals(it.getProp("written")))
49
		}
50
		
51
		if (corpus_units.size() == 0) continue;
52
		
53
		try {
54
			println "processing word Units ${text_id} and its units "+corpus_units.size()
55
			WordUnitsInserter inserter = new WordUnitsInserter(corpus, xmltxmFile, corpus_units);
56
			if (inserter.process(xmltxmFileCopy) && ValidateXml.test(xmltxmFileCopy)) {
57
				xmltxmFile.delete()
58
				xmltxmFileCopy.renameTo(xmltxmFile)
59

  
60
				for (Unite unit : corpus_units) {
61
					unit.getProps()["written"] = "true"
62
				}
63
				URSCorpora.saveCorpus(corpus);
64
				println "Done, "+corpus_units.size()+ " units written"
65

  
66
			} else {
67
				println "Error while processing milestones $xmltxmFile file"
68
				File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
69
				error.getParentFile().mkdirs()
70
				println "	moving created file to $error"
71
				error.delete()
72
				xmltxmFileCopy.renameTo(error)
73
			}
74
		} catch(Exception e) {
75
			println "Error while processing milestones $xmltxmFile file: "+e
76
			e.printStackTrace();
77
			File error = new File(corpus.getProject().getProjectDirectory(), "error/"+xmltxmFile.getName())
78
			error.getParentFile().mkdirs()
79
			println "	moving created file to $error"
80
			error.delete()
81
			xmltxmFileCopy.renameTo(error)
82
		}
83
	}
84
}
tmp/org.txm.analec.rcp/META-INF/MANIFEST.MF (revision 2087)
167 167
 org.txm.annotation.urs.view,
168 168
 org.txm.annotation.urs.widgets,
169 169
 org.txm.macro.urs,
170
 org.txm.macro.urs.cqp,
171 170
 org.txm.macro.urs.democrat,
172 171
 org.txm.macro.urs.edit,
173 172
 org.txm.macro.urs.exploit,
174 173
 org.txm.macro.urs.export,
174
 org.txm.macroprototypes.urs.cqp,
175 175
 org.txm.macroprototypes.urs.misc,
176 176
 visuAnalec,
177 177
 visuAnalec.chaines,
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/commands/StructuresIndexMacro.groovy (revision 2087)
1
package org.txm.macro.commands
2
// Copyright © 2017 ENS de Lyon, CNRS, University of Franche-Comté
3
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
4
// @author sheiden
5

  
6
/*
7
 Macro affichant les statistiques de différentes structures d'un corpus
8
 Paramètres de la macro :
9
 - corpus : le corpus sélectionné dans la vue Corpus
10
 - structures : liste des structures à interroger. Séparer les noms par une virgule.
11
 - structProperties : liste des propriétés de structures. Séparer les noms par une virgule.
12
 Il doit y avoir autant de propriétés de structures que de structures indiquées dans le paramètre structures.
13
 Les structures doivent posséder la propriété demandée.
14
 Ce paramètre peut être laissé vide, dans ce cas la colonne 'prop' n'est pas affichée.
15
 - query : requête CQL de sélection de mots exprimée obligatoirement en format complet : [...]
16
 Par exemple :
17
 - [frpos="N.*"] pour sélectionner les noms communs et les noms propres
18
 - [] pour sélectionner tous les mots
19
 - wordProperty : propriété de mot utilisée pour calculer le vocabulaire et les fréquences
20
 - displayIndex : calculer l'index hiérarchique des valeurs de la propriété wordProperty pour la requête query sur chaque structure
21
 - Vmax : nombre maximum des mots les plus fréquents à afficher dans l'index
22
 Résultat :
23
 Le résultat est un tableau TSV affiché dans la console.
24
 On peut l'exploiter avec un copier/coller dans Calc.
25
 Chaque ligne correspond à une structure du corpus.
26
 Les lignes sont ordonnées par ordre hiérarchique des structures du début à la fin du corpus.
27
 Les colonnes sont :
28
 - struct : nom de la structure
29
 - prop : valeur de la propriété de la structure
30
 (si le paramètre structProperties est vide, cette colonne est absente du résultat)
31
 - start : position du premier mot de la structure dans le corpus
32
 (les positions du corpus sont numérotées à partir de 0).
33
 Les colonnes start et end sont pratiques quand on n'a pas de propriété de structure à afficher pour se repérer dans le corpus.
34
 - end : position du dernier mot de la structure
35
 - T : taille de la structure (end-start)
36
 - t : nombre de mots sélectionnés dans la structure
37
 - v : nombre de valeurs différentes de la propriété des mots sélectionnés dans la structure
38
 - fmin : fréquence minimale des valeurs de la propriété de mots sélectionnés dans la structure
39
 - fmax : fréquence maximale des valeurs de la propriété de mots sélectionnés dans la structure
40
 - index : l'index hiérarchique des valeurs de la propriété de mot choisie des mots sélectionnés par la requête CQL
41
 Exemple de résultats sur le texte "Essais sur la peinture" de Diderot :
42
 struct	prop	start	end	T	t	v	fmin	fmax	index
43
 text	DiderotEssais	46203	56871	10668	2011	903	1	38	[nature, couleur, homme, tableau, lumière, objets, œil, toile, art, effet, corps, artiste, ombre, ombres, deux, peintre, peinture, dessin, couleurs, tête]
44
 div	0	46214	49223	3009	549	327	1	16	[nature, homme, modèle, figure, deux, école, artiste, chose, âge, figures, dessin, actions, fois, professeur, action, attitude, manière, femme, col, tête]
45
 p	0	46220	46259	39	5	5	1	1	[nature, forme, cause, êtres, un]
46
 p	1	46260	46456	196	36	25	1	3	[yeux, col, épaules, gorge, femme, jeunesse, nature, accroissement, orbe, paupières, cavité, absence, organe, sourcils, joues, lèvre, mouvement, altération, parties, visage]
47
 p	2	46457	46578	121	28	26	1	2	[pieds, nature, regards, homme, dos, poitrine, forme, cartilages, col, vertèbres, tête, mains, articulation, poignet, coudes, arrière, membres, centre, gravité, système]
48
 p	3	46579	46622	43	5	4	1	2	[causes, effets, êtres, imitation]
49
 p	4	46623	46727	104	22	20	1	2	[ignorance, règles, effets, causes, convention, suites, peine, artiste, imitation, nature, pieds, jambes, genoux, têtes, tact, observation, phénomènes, liaison, enchaînement, difformités]
50
 p	5	46728	46797	69	10	6	1	4	[nez, Antinoüs, nature, difformité, altérations, reste]
51
 p	6	46798	46859	61	9	7	1	2	[règles, nature, homme, rue, chose, statue, proportions]
52
 p	7	46860	46942	82	13	11	1	2	[extrémité, pied, voile, bossu, Venus, Medicis, nature, figure, crayons, monstre, chose]
53
 p	8	46943	46982	39	11	11	1	1	[figure, système, suites, inconséquence, principe, production, art, mille, lieues, œuvre, nature]
54
 p	9	46983	47196	213	38	30	1	5	[homme, figure, âge, fonctions, mystères, art, artiste, proportions, despotisme, nature, condition, sacrifice, cent, manières, organisation, habitude, facilité, grandeur, proportion, membre]
55
 ... [13 paragraphes] ...
56
 div	1	49224	52163	2939	531	307	1	23	[couleur, nature, chair, artiste, toile, art, homme, yeux, œil, couleurs, tableau, harmonie, effet, dessin, palette, organe, ton, coloriste, vie, ami]
57
 p	24	49230	49258	28	7	7	1	1	[C', dessin, forme, êtres, couleur, vie, souffle]
58
 p	25	49259	49284	25	6	6	1	1	[maîtres, art, juges, dessin, monde, couleur]
59
 p	26	49285	49354	69	16	16	1	1	[dessinateurs, coloristes, littérature, Cent, froids, orateur, Dix, orateurs, poète, intérêt, homme, Helvétius, dix, bons, peine, mort]
60
 p	27	49355	49485	130	24	21	1	2	[artiste, besoin, échelle, ami, atelier, teintes, demi-, palette, quart, heure, travail, ordre, pendant, passage, auteur, bureau, ligne, livre, place, allure]
61
 p	28	49486	49680	194	46	42	1	2	[yeux, toile, chaos, œuvre, sentiment, couleur, bouche, palette, image, pinceau, création, oiseaux, nuances, plumage, fleurs, velouté, arbres, verdures, azur, ciel]
62
 p	29	49681	49967	286	48	43	1	3	[nature, organe, homme, arbre, artistes, chose, monde, variété, coloristes, couleur, disposition, doute, œil, couleurs, tableau, effets, rouges, blancs, tapisserie, murs]
63
 p	30	49968	50068	100	20	17	1	3	[fois, organe, peintre, ouvrage, littérateur, caractère, disposition, pente, homme, voix, explosion, état, silence, artiste, tableau, couleur, coloris]
64
 p	31	50069	50105	36	7	7	1	1	[coup, organe, affection, corps, vapeur, nature, imitation]
65
 p	32	50106	50267	161	26	19	1	4	[couleur, palette, artiste, effet, tableau, teintes, couleurs, idée, endroit, fois, appréciation, scène, composition, manie, travail, teinte, composé, substances, unes]
66
 p	33	50268	50319	51	7	7	1	1	[général, harmonie, composition, peintre, effet, pinceau, couleur]
67
 ... [etc.]
68
 Avec les paramètres :
69
 - structures : text,div,p
70
 - structProperties : id,n,n
71
 - query : [frpos="N.*"]
72
 - wordProperty : word
73
 - displayIndex : true
74
 - Vmax : 20
75
 */
76

  
77
// Déclarations
78

  
79
import org.kohsuke.args4j.*
80

  
81
import groovy.transform.Field
82

  
83
import org.txm.rcp.swt.widget.parameters.*
84
import org.txm.Toolbox
85
import org.eclipse.ui.console.*
86
import org.txm.macro.cqp.*
87
import org.txm.searchengine.cqp.CQPSearchEngine
88
import org.txm.searchengine.cqp.corpus.CQPCorpus
89
import org.txm.searchengine.cqp.corpus.Partition
90
import org.txm.searchengine.cqp.corpus.Property
91
import org.txm.searchengine.cqp.corpus.QueryResult
92
import org.txm.searchengine.cqp.corpus.Subcorpus;
93
import org.txm.searchengine.cqp.corpus.query.CQLQuery
94
import org.txm.rcp.commands.*
95
import org.txm.statsengine.r.core.RWorkspace
96

  
97
byte CQI_CONST_FIELD_MATCH = (byte) 0x10
98

  
99
def scriptName = this.class.getSimpleName()
100

  
101
def selection = []
102
for (def s : corpusViewSelections) {
103
	if (s instanceof CQPCorpus) selection << s
104
	else if (s instanceof Partition) selection.addAll(s.getParts())
105
}
106

  
107
if (selection.size() == 0) {
108
	println "** $scriptName: please select a Corpus or a Partition in the Corpus view: "+corpusViewSelections
109
	return false
110
}
111
println "WORKING WITH $selection"
112
// BEGINNING OF PARAMETERS
113

  
114
@Field @Option(name="structures", usage="act,scene", widget="String", required=true, def="text,div,p")
115
		def structures
116
@Field @Option(name="structProperties", usage="n,n", widget="String", required=false, def="id,n,n")
117
		def structProperties
118
@Field @Option(name="query", usage="[word!='\\p{P}']", widget="String", required=true, def="[pos=\"NOM.*\"|frpos=\"N.*\"]")
119
		def query
120
@Field @Option(name="wordProperty", usage="word", widget="String", required=true, def="word")
121
		def wordProperty
122
@Field @Option(name="displayIndex", usage="display a hierarchical index", widget="Boolean", required=true, def="true")
123
		def displayIndex
124
@Field @Option(name="Vmax", usage="size of index", widget="Integer", required=false, def="20")
125
		def Vmax
126
// END OF PARAMETERS
127

  
128
// Open the parameters input dialog box
129
if (!ParametersDialog.open(this)) return;
130

  
131
def CQI = CQPSearchEngine.getCqiClient()
132

  
133
def corpusStructs = structures.split(",") 			// ["act", "scene"]
134
structProperties = structProperties.trim()
135

  
136
if (structProperties.size() > 0) {
137
	propParam = true
138
	corpusStructPropNames = structProperties.split(",")	// ["n", "n"]
139
	corpusStructProps = [corpusStructs, corpusStructPropNames].transpose().collectEntries()
140
} else {
141
	propParam = false
142
}
143

  
144
// First define the order theory over corpus structures intervals
145
// by defining a binary comparator that will be used to build the
146
// TreeSet of intervals
147

  
148
// function to print the hierarchical index of a query
149
def print_index = { c, q, p, cut ->
150

  
151
	QueryResult qr = c.query(new CQLQuery(q), "RES1", false);
152
	Subcorpus subcorpus = c.createSubcorpus("RES1", qr);
153
	p = subcorpus.getProperty(p)
154
	def tC = subcorpus.getSize()
155
	def matches_target_p = CQI.cpos2Str(p.getQualifiedName(), CQI.dumpSubCorpus(qr.getQualifiedCqpId(), CQI_CONST_FIELD_MATCH, 0, tC-1))
156
	if (cut > 0) {
157
		println matches_target_p.countBy { it }.sort { -it.value }.take(cut)
158
	} else {
159
		println matches_target_p.countBy { it }.sort { -it.value }
160
	}
161
	subcorpus.delete()
162
}
163

  
164
// function to print the statistics of an index of a query
165
def print_freq = { CQPCorpus c, q, p ->
166

  
167
	// appel du moteur
168
	//println "QUERY=$q"
169
	QueryResult qr = c.query(new CQLQuery(q), "RES1", false);
170
	Subcorpus subcorpus = c.createSubcorpus("RES1", qr);
171
	p = subcorpus.getProperty(p)
172
	int csize = c.getSize()
173
	if (csize == 0) {
174
		if (displayIndex) {
175
			println "0\t0\t0\t0\t[]"
176
		} else {
177
			println "0\t0\t0\t0"
178
		}
179
	} else {
180
		def tC = CQI.subCorpusSize(subcorpus.getQualifiedCqpId())
181
		def matches_target_p = CQI.cpos2Id(p.getQualifiedName(), CQI.dumpSubCorpus(subcorpus.getQualifiedCqpId(), CQI_CONST_FIELD_MATCH, 0, tC-1))
182

  
183
		//println ""
184

  
185
		// afficher les positions de mots du résultat
186
		//println CQI.dumpSubCorpus("${c}:RES1", CQI_CONST_FIELD_MATCH, 0, CQI.subCorpusSize("${c}:RES1")-1)
187

  
188
		// afficher les codes des occurrences de la propriété du résultat
189
		//println matches_target_p
190

  
191
		// afficher l'index hiérarchique des codes du résultat
192
		//println matches_target_p.collect { it }.countBy { it }.sort { -it.value }
193

  
194
		// calculer la fréquence de chaque valeur et ne garder que les fréquences
195
		def index = matches_target_p.collect { it }.countBy { it }
196
		def freqs = index.values()
197

  
198
		// afficher la liste décroissante des fréquences du résultat
199
		//println freqs.sort { -it.value }
200

  
201

  
202
		//def tF = freqs.sum() // control value
203
		def v = freqs.size()
204
		def fmin = freqs.min()
205
		def fmax = freqs.max()
206
		//println sprintf("t %d, v %d, fmin %d, fmax %d", tC, v, fmin, fmax)
207
		print sprintf("%d\t%d\t%d\t%d", tC, v, fmin, fmax)
208
		// afficher les valeurs des occurrences de la propriété du résultat
209
		if (displayIndex) {
210
			heads = index.sort { -it.value }.take(Vmax).keySet()
211
			println "\t"+heads.collect { CQI.id2Str(p.getQualifiedName(), it)[0] }
212
		} else {
213
			println ""
214
		}
215
	}
216
	subcorpus.delete()
217
}
218

  
219
def r = RWorkspace.getRWorkspaceInstance()
220

  
221
/**
222
 * group units by CQP match
223
 *
224
 * units are sorted for faster processing
225
 *
226
 * @param allUnites
227
 * @param matches
228
 * @param strict_inclusion
229
 * @return
230
 */
231
static def inter(def allUnites, def matches) {
232
	//println allUnites.collect() {it -> it[0]}
233
	allUnites = allUnites.sort() { a, b -> a[0] <=> b[0] ?: a[1] <=> b[1] }
234
	//println allUnites.collect() {it -> it[0]}
235
	def unitsSize = allUnites.size()
236
	def iCurrentUnit = 0
237
	def selectedUnits = []
238

  
239
	def matchesSize = matches.size()
240
	def iCurrentMatch = 0
241

  
242

  
243
	while (iCurrentMatch < matchesSize && iCurrentUnit < unitsSize) {
244

  
245
		def unit = allUnites[iCurrentUnit]
246
		def match = matches[iCurrentMatch]
247
		if (unit[1] < match.getStart()) {
248
			iCurrentUnit++
249
		} else if (unit[0] > match.getEnd()) {
250
			iCurrentMatch++
251
		} else {
252

  
253
			if (match.getStart() <= unit[0] && unit[1] <= match.getEnd()) {
254
				selectedUnits << unit
255
			}
256

  
257
			iCurrentUnit++
258
		}
259
	}
260
	return selectedUnits
261
}
262

  
263
selection.each { corpus ->
264

  
265
	corpusName = corpus.getID()
266
	mainCorpusName = corpus.getMainCorpus().getID()
267
	println "Corpus = "+corpusName
268
	println "Corpus QualifiedCqpId = "+corpus.getCqpId()
269
	println "MainCorpus = "+mainCorpusName
270
	println "Corpus QualifiedCqpId = "+corpus.getMainCorpus().getCqpId()
271

  
272
	def struct_names = (CQI.corpusStructuralAttributes(corpus.getMainCorpus().getCqpId()) as List)
273
	struct_names.removeAll { it.contains('_') }
274
	struct_names=(struct_names-"txmcorpus").grep(corpusStructs)
275
	//println "struct_names = "+struct_names
276

  
277
	if (struct_names.size() == 0) {
278
		println "** Impossible to find the structures (${corpusStructs}), aborting."
279
		return
280
	}
281

  
282
	def level = [:]
283

  
284
	// Now build the TreeSet of corpus structures intervals
285

  
286
	def h = new TreeSet<Struct>()
287

  
288
	struct_names.each {
289
		def matches = []
290
		for (i in 0..CQI.attributeSize("${mainCorpusName}.${it}")-1) {
291
			(start, end) = CQI.struc2Cpos("${mainCorpusName}.${it}", i)
292
			matches << [start, end]
293
			//println sprintf("Adding %s[%d, %d]", it, start, end)
294
		}
295
		def intersection = inter(matches, corpus.getMatches())
296
		for (def item : intersection)
297
			h.add(new Struct(it, item[0], item[1]))
298
	}
299

  
300
	if (propParam) {
301
		print sprintf("struct\tprop\tstart\tend\tT\tt\tv\tfmin\tfmax")
302
	} else {
303
		print sprintf("struct\tstart\tend\tT\tt\tv\tfmin\tfmax")
304
	}
305

  
306
	if (displayIndex) {
307
		println sprintf("\tindex")
308
	} else {
309
		println ""
310
	}
311

  
312
	def env = System.getenv()
313
	def localPath = env["HOME"]+"/Documents/d3test"
314
	new File(localPath).mkdirs()
315

  
316
	// reset output file
317
	def resultFile = new File(localPath, "desc-partition.html")
318
	def result = new PrintWriter(resultFile)
319
	result.print("")
320
	result.close()
321

  
322
	resultFile << '''\
323
<!DOCTYPE html>
324
<html>
325
  <head>
326
    <meta http-equiv="Content-Type" content="text/html;charset=utf-8" charset="UTF-8"/>
327
    <link type="text/css" rel="stylesheet" href="style.css"/>
328
    <script type="text/javascript" src="d3/d3.v3.js" charset="utf-8"></script>
329
    <script type="text/javascript" src="d3/layout/partition.js" charset="utf-8"></script>
330
    <style type="text/css">
331

  
332
.chart {
333
  display: block;
334
  margin: auto;
335
  margin-top: 60px;
336
  font-size: 11px;
337
}
338

  
339
rect {
340
  stroke: #eee;
341
  fill: #aaa;
342
  fill-opacity: .8;
343
}
344

  
345
rect.parent {
346
  cursor: pointer;
347
  fill: steelblue;
348
}
349

  
350
text {
351
  pointer-events: none;
352
}
353

  
354
    </style>
355
  </head>
356
  <body>
357
    <div id="body">
358
      <div id="footer">
359
        Structures hierarchy
360
        <div class="hint">click or shift-alt-click to zoom-in or out</div>
361
      </div>
362
    </div>
363
    <script type="text/javascript">
364

  
365
var w = 1120,
366
    h = 600,
367
    x = d3.scale.linear().range([0, w]),
368
    y = d3.scale.linear().range([0, h]);
369

  
370
var vis = d3.select("#body").append("div")
371
    .attr("class", "chart")
372
    .style("width", w + "px")
373
    .style("height", h + "px")
374
  .append("svg:svg")
375
    .attr("width", w)
376
    .attr("height", h);
377

  
378
var partition = d3.layout.partition()
379
    .value(function(d) { return d.size; }).sort(null);
380

  
381
var tree = `{'''
382

  
383
	// Now iterate on the TreeSet to get a depth first search on the structure intervals
384

  
385
	def rec_struct_regex = /([^0-9]+)[0-9]+/
386

  
387
	/*
388
	 "name": "sha-hamlet",
389
	 "children": [
390
	 {
391
	 "name": "sha-hamcast",
392
	 "children": [
393
	 {
394
	 "name": "sha-ham1",
395
	 "children": [
396
	 {"name": "sha-ham102", "size": 855},
397
	 {"name": "sha-ham103", "size": 464},
398
	 {"name": "sha-ham104", "size": 296},
399
	 {"name": "sha-ham105", "size": 635}
400
	 ]
401
	 }
402
	 ]
403
	 }
404
	 ]
405
	 }`;
406
	 */
407

  
408
	def displayTree = { head ->
409
		if (head) {
410
			subtree = h.tailSet(head)
411
			subtree.each { print sprintf("%s[%d, %d], ", it.name, it.start, it.end) }
412
			println ""
413
			if (subtree.size() == 0) {
414
				println sprintf("%s[%d, %d]", head.name, head.start, head.end)
415
			} else {
416
				displayTree(subtree)
417
			}
418
		}
419
	}
420

  
421
	//displayTree(h.first())
422

  
423
	def divPropVals = []
424
	def divLengths = []
425
	def textDivPropVals = []
426
	def textDivLengths = []
427

  
428
	h.each {
429

  
430
		//println sprintf("Displaying %s[%d, %d]", it.name, it.start, it.end)
431
		if (propParam) {
432

  
433
			def rec_match = (it.name =~ rec_struct_regex)
434
			if (rec_match.size() == 1) {
435
				println "Rec struct match = "+rec_match[0][1]
436
				istruct_name = rec_match[0][1]
437
			} else {
438
				//println "Struct match = "+it.name
439
				istruct_name = it.name
440
			}
441

  
442
			def struct_name = "${mainCorpusName}.${istruct_name}_${corpusStructProps[it.name]}"
443
			def propVal = CQI.struc2Str(struct_name, CQI.cpos2Struc(struct_name, [it.start] as int[]))[0]
444
			if (it.name == "text") {
445
				textDivPropVals.push(divPropVals)
446
				divPropVals = []
447
				textDivLengths.push(divLengths)
448
				divLengths = []
449
			} else if (it.name == "div") {
450
				divPropVals.push(propVal)
451
				divLengths.push(it.end-it.start)
452
			}
453

  
454

  
455
			print sprintf("%s\t%s\t%d\t%d\t%d\t", it.name, propVal, it.start, it.end, it.end-it.start)
456
		} else {
457
			def struct_name = "${mainCorpusName}.${it.name}"
458
			print sprintf("%s\t%d\t%d\t%d\t", it.name, it.start, it.end, it.end-it.start)
459
		}
460
		print_freq(corpus, sprintf("a:%s :: a>=%d & a<=%d", query, it.start, it.end), wordProperty)
461
	}
462

  
463
	textDivPropVals.push(divPropVals)
464
	textDivPropVals.remove(0)
465
	textDivLengths.push(divLengths)
466
	textDivLengths.remove(0)
467

  
468
	println textDivPropVals
469
	println textDivLengths
470

  
471
	def textDivPropVals1 = textDivPropVals[0] as String[]
472
	r.addVectorToWorkspace("textDivPropVals1", textDivPropVals1)
473
	def textDivLengths1 = textDivLengths[0] as int[]
474
	r.addVectorToWorkspace("textDivLengths1", textDivLengths1)
475

  
476
	def PNGFile = File.createTempFile("txm", ".png", new File(Toolbox.getTxmHomePath(), "results"))
477
	def PNGFilePath = PNGFile.getAbsolutePath()
478
	println "PNG file: "+PNGFilePath
479

  
480
	def SVGFile = File.createTempFile("txm", ".svg", new File(Toolbox.getTxmHomePath(), "results"))
481
	def SVGFilePath = SVGFile.getAbsolutePath()
482
	println "SVG file: "+SVGFilePath
483

  
484
	/// BEGINNING OF R SCRIPT
485
	def script ="""
486
df <- data.frame(structure=textDivPropVals1,
487
                 longueur=textDivLengths1)
488
p<-ggplot(data=df, aes(x=structure, y=longueur)) +
489
  geom_bar(stat="identity", fill="steelblue") +
490
  geom_text(aes(label=longueur), vjust=1.6, color="white", size=3.5) +
491
  labs(title="${corpusName}", x="Structure div", y = "Longueur") +
492
  theme_minimal()
493
"""
494
	/// END OF R SCRIPT
495

  
496
	// execute R script
497
	try {
498
		r.eval("library(ggplot2)")
499
		try {
500
			r.eval(script+"ggsave(file=\"${PNGFilePath}\", plot=p)")
501
			r.eval(script+"ggsave(file=\"${SVGFilePath}\", plot=p)")
502
	
503
			//display the SVG results graphic
504
			monitor.syncExec(new Runnable() {
505
						@Override
506
						public void run() { try { OpenSVGGraph.OpenSVGFile(SVGFilePath, "Longueur des structures de "+corpusName) } catch(Exception e) {e.printStackTrace()} }
507
					})
508
		} catch (Exception e) {
509
			println "** Error: "+e
510
		}
511
	} catch (Exception e) {
512
		println "** The 'ggplot2' R package is not installed. Start R ("+RWorkspace.getExecutablePath()+") and run 'install.packages(\"ggplot2\");'."
513
	}
514
}
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/commands/Struct.groovy (revision 2087)
1
package org.txm.macro.commands;
2

  
3
class Struct implements Comparable<Struct> {
4

  
5
	String  name
6
	Integer start
7
	Integer end
8

  
9
	Struct(String n, Integer s, Integer e) {
10
		name  = n
11
		start = s
12
		end   = e
13
	}
14

  
15
	public int compareTo(Struct s) {
16
		if (start < s.start && end > s.end) { 			// self contains s : [ { } ]
17
			//println sprintf("%s[%d, %d] ^ %s[%d, %d]", name, start, end, s.name, s.start, s.end)
18
			return -1
19
		} else if (start > s.start && end < s.end) { 	// s contains self : { [ ] }
20
			//println sprintf("%s[%d, %d] v %s[%d, %d]", name, start, end, s.name, s.start, s.end)
21
			return 1
22
		} else if (start == s.start && end == s.end) { 	// self and s have the same intervals : [{ }]
23
			//println sprintf("%s[%d, %d] = %s[%d, %d]", name, start, end, s.name, s.start, s.end)
24
			return name.compareTo(s.name) // use the lexicographic order of the structure names
25
		} else if (start < s.start) { 					// interval starting on the left comes first : [ { ...
26
			return -1
27
		} else if (start > s.start) { 					// interval starting on the right comes after : { [ ...
28
			return 1
29
		} else if (end > s.end) { 						// same start, interval ending on the right comes before : [{ } ]...
30
			return -1
31
		} else if (end < s.end) { 						// same start, interval ending on the right comes before : [{ ] }...
32
			return -1
33
		} else {										// same start, same end : [{ ]}...
34
			return name.compareTo(s.name) // use the lexicographic order of the structure names
35
		}
36
	}
37

  
38
	public toString(Struct s) {
39
		sprintf("%s[%d, %d]", s.name, s.start, s.end)
40
	}
41

  
42
	public print(Struct s) {
43
		print(s.toString())
44
	}
45
}

Also available in: Unified diff