Révision 2633
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/prototypes/commands/TreeMapDescriptionMacro.groovy (revision 2633) | ||
---|---|---|
1 |
package org.txm.macro.cqp |
|
2 |
// Copyright © 2016 ENS de Lyon |
|
3 |
// |
|
4 |
// Authors: |
|
5 |
// - Serge Heiden |
|
6 |
// |
|
7 |
// Licence: |
|
8 |
// This file is part of the TXM platform. |
|
9 |
// The TXM platform is free software: you can redistribute it |
|
10 |
// and/or modify it under the terms of the GNU General Public |
|
11 |
// License as published by the Free Software Foundation, |
|
12 |
// either version 2 of the License, or (at your option) any |
|
13 |
// later version. |
|
14 |
// |
|
15 |
// The TXM platform is distributed in the hope that it will be |
|
16 |
// useful, but WITHOUT ANY WARRANTY; without even the implied |
|
17 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
|
18 |
// PURPOSE. See the GNU General Public License for more |
|
19 |
// details. |
|
20 |
// |
|
21 |
// You should have received a copy of the GNU General |
|
22 |
// Public License along with the TXM platform. If not, see |
|
23 |
// http://www.gnu.org/licenses. |
|
24 |
// |
|
25 |
// Version: |
|
26 |
// $LastChangedDate: 2014-11-01 16:00:01 +0100 (sam., 1 nov. 2014) $ |
|
27 |
// $LastChangedRevision: XXXX $ |
|
28 |
// $LastChangedBy: sheiden $ |
|
29 |
// |
|
30 |
|
|
31 |
//FR: |
|
32 |
// Macro affichant les statistiques de différentes structures d'un corpus |
|
33 |
// |
|
34 |
// Exemple de résultat : |
|
35 |
// struct prop start end t v fmin fmax |
|
36 |
// act I 153 5824 5672 1396 1 400 |
|
37 |
// scene I.I 155 2415 2261 736 1 159 |
|
38 |
// scene I.II 2416 3216 686 382 1 15 |
|
39 |
// scene I.III 3217 5824 2206 775 1 78 |
|
40 |
// act II 5825 13444 6397 1667 1 199 |
|
41 |
// scene II.I 5827 8017 1855 786 1 47 |
|
42 |
// scene II.II 8018 8754 601 267 1 19 |
|
43 |
// scene II.III 8755 11898 2642 893 1 88 |
|
44 |
// scene II.IV 11899 12490 496 232 1 13 |
|
45 |
// scene II.V 12491 13444 801 371 1 39 |
|
46 |
// ... |
|
47 |
// |
|
48 |
// Pour le corpus SHAKESPEARE (All's Well That Ends Well) avec les paramètres : |
|
49 |
// - corpus : SHAKESPEARE |
|
50 |
// - structures : act,scene |
|
51 |
// - structProperties : n,n |
|
52 |
// - query : [word!='\p{P}'] |
|
53 |
// - wordProperty : word |
|
54 |
// |
|
55 |
// Paramètres de la macro : |
|
56 |
// - corpus : nom du corpus à interroger |
|
57 |
// - structures : liste des structures à interroger. Séparer les noms par une virgule |
|
58 |
// - structProperties : liste des propriétés de structures. Séparer les noms par une virgule. |
|
59 |
// Il doit y avoir autant de propriétés de structures que de structures indiquées dans le paramètre structure. |
|
60 |
// Les structures doivent posséder la propriété demandée. |
|
61 |
// Ce paramètre peut être laissé vide, dans ce cas la colonne 'prop' n'est pas affichée. |
|
62 |
// - query : requête CQL de mot exprimée obligatoirement en format complet : [...] |
|
63 |
// - wordProperty : propriété de mot utilisée pour calculer le vocabulaire et les fréquences |
|
64 |
// |
|
65 |
// Résultat : |
|
66 |
// Le résultat est un tableau TSV affiché dans la console. |
|
67 |
// On peut l'exploiter avec un copier/coller dans Calc. |
|
68 |
// Chaque ligne correspond à une structure du corpus. |
|
69 |
// Les lignes sont ordonnées par ordre hiérarchique des structures. |
|
70 |
// Les colonnes sont : |
|
71 |
// - struct : nom de la structure |
|
72 |
// - prop : valeur de la propriété de la structure |
|
73 |
// (si le paramètre structProperties est vide, cette colone est absente du résultat) |
|
74 |
// - start : position du premier mot de la structure dans le corpus |
|
75 |
// (les positions du corpus sont numérotées à partir de 0 - la position du premier mot du corpus est 0). |
|
76 |
// Les colonnes start et end sont pratiques quand on n'a pas de propriété de structure à afficher pour se repérer dans le corpus. |
|
77 |
// - end : position du dernier mot de la structure |
|
78 |
// - t : nombre de mots de la structure |
|
79 |
// - v : nombre de valeurs différentes de la propriété de mot dans la structure |
|
80 |
// - fmin : fréquence minimale des valeurs de la propriété de mot dans la structure |
|
81 |
// - fmax : fréquence maximale des valeurs de la propriété de mot dans la structure |
|
82 |
|
|
83 |
// Déclarations |
|
84 |
|
|
85 |
import org.kohsuke.args4j.* |
|
86 |
import groovy.transform.Field |
|
87 |
import org.txm.rcpapplication.swt.widget.parameters.* |
|
88 |
|
|
89 |
import org.txm.Toolbox |
|
90 |
|
|
91 |
import org.eclipse.ui.console.* |
|
92 |
|
|
93 |
byte CQI_CONST_FIELD_MATCH = (byte) 0x10 |
|
94 |
|
|
95 |
// BEGINNING OF PARAMETERS |
|
96 |
|
|
97 |
@Field @Option(name="corpus", usage="SHAKESPEARE", widget="String", required=true, def="SHAKESPEARE") |
|
98 |
def corpus |
|
99 |
|
|
100 |
@Field @Option(name="structures", usage="act,scene", widget="String", required=true, def="act,scene") |
|
101 |
def structures |
|
102 |
|
|
103 |
@Field @Option(name="structProperties", usage="n,n", widget="String", required=false, def="n,n") |
|
104 |
def structProperties |
|
105 |
|
|
106 |
@Field @Option(name="query", usage="[word!='\\p{P}']", widget="String", required=true, def="[word!='\\p{P}']") |
|
107 |
def query |
|
108 |
|
|
109 |
@Field @Option(name="wordProperty", usage="word", widget="String", required=true, def="word") |
|
110 |
def wordProperty |
|
111 |
|
|
112 |
@Field @Option(name="displayIndex", usage="display a hierarchical index", widget="Boolean", required=true, def="true") |
|
113 |
def displayIndex |
|
114 |
|
|
115 |
@Field @Option(name="Vmax", usage="size of index", widget="Integer", required=false, def="20") |
|
116 |
def Vmax |
|
117 |
|
|
118 |
// Open the parameters input dialog box |
|
119 |
if (!ParametersDialog.open(this)) return; |
|
120 |
|
|
121 |
// END OF PARAMETERS |
|
122 |
|
|
123 |
def clearConsole = { -> |
|
124 |
// clear the console |
|
125 |
(ConsolePlugin.getDefault().getConsoleManager().getConsoles())[0].clearConsole() |
|
126 |
} |
|
127 |
|
|
128 |
clearConsole() |
|
129 |
|
|
130 |
def corpusEngine = Toolbox.getCqiClient() |
|
131 |
|
|
132 |
def corpusName = corpus // "SHAKESPEARE" |
|
133 |
def corpusStructs = structures.split(",") // ["act", "scene"] |
|
134 |
structProperties = structProperties.trim() |
|
135 |
|
|
136 |
if (structProperties.size() > 0) { |
|
137 |
propParam = true |
|
138 |
corpusStructPropNames = structProperties.split(",") // ["n", "n"] |
|
139 |
corpusStructProps = [corpusStructs, corpusStructPropNames].transpose().collectEntries() |
|
140 |
} else { |
|
141 |
propParam = false |
|
142 |
} |
|
143 |
|
|
144 |
def struct_names = (corpusEngine.corpusStructuralAttributes(corpusName) as List) |
|
145 |
struct_names.removeAll { it.contains('_') } |
|
146 |
struct_names=(struct_names-"txmcorpus").grep(corpusStructs) |
|
147 |
//println "struct_names = "+struct_names |
|
148 |
|
|
149 |
if (struct_names.size() == 0) { |
|
150 |
println "** Impossible to find the structures (${corpusStructs}), aborting." |
|
151 |
return |
|
152 |
} |
|
153 |
|
|
154 |
def level = [:] |
|
155 |
|
|
156 |
/* |
|
157 |
class Node { |
|
158 |
|
|
159 |
String name |
|
160 |
Integer start |
|
161 |
Integer end |
|
162 |
Node parent |
|
163 |
List<Node> children |
|
164 |
|
|
165 |
Node(String n, Integer s, Integer e) { |
|
166 |
name = n |
|
167 |
start = s |
|
168 |
end = e |
|
169 |
children = new LinkedList<Node>() |
|
170 |
} |
|
171 |
|
|
172 |
public int compareTo(Node n) { |
|
173 |
|
|
174 |
/* Possible combinations |
|
175 |
this = [] |
|
176 |
n = {} |
|
177 |
an interval is not empty |
|
178 |
|
|
179 |
[ { } ] -> this is parent 1 |
|
180 |
[ { }] -> this is parent 1 |
|
181 |
[ { ] } -> *overlap 0 |
|
182 |
[ ]{ } -> this is left sibling 2 |
|
183 |
[ ] { } -> this is left sibling 2 |
|
184 |
[{ } ] -> this is parent 1 |
|
185 |
[{ }] -> *duplicate 0 |
|
186 |
[{ ] } -> this is child -1 |
|
187 |
|
|
188 |
{ [ ] } -> n is parent -1 |
|
189 |
{ [ ]} -> n is parent -1 |
|
190 |
{ [ } ] -> *overlap 0 |
|
191 |
{ }[ ] -> n is left sibling -2 |
|
192 |
{ } [ ] -> n is left sibling -2 |
|
193 |
{[ ] } -> n is parent -1 |
|
194 |
{[ ]} -> *duplicate 0 |
|
195 |
{[ } ] -> n is child 1 |
|
196 |
|
|
197 |
// |
|
198 |
|
|
199 |
if (start < n.start) { |
|
200 |
// [ { } ] -> this is parent 1 |
|
201 |
// [ { }] -> this is parent 1 |
|
202 |
// [ { ] } -> *overlap 0 |
|
203 |
// [ ]{ } -> this is left sibling 2 |
|
204 |
// [ ] { } -> this is left sibling 2 |
|
205 |
if (end > n.end) { |
|
206 |
// [ { } ] -> this is parent 1 |
|
207 |
return 1 |
|
208 |
} else if (end == n.end) { |
|
209 |
// [ { }] -> this is parent 1 |
|
210 |
return 1 |
|
211 |
} else if (end < n.end && end > n.start) { |
|
212 |
// [ { ] } -> *overlap 0 |
|
213 |
println "** Error: overlapping intervals, [ { ] } should not happen, "+this.toString()+", "+n.toString() |
|
214 |
return 0 |
|
215 |
} else if (end == n.start) { |
|
216 |
// [ ]{ } -> this is left sibling 2 |
|
217 |
return 2 |
|
218 |
} else if (end < n.start) { |
|
219 |
// [ ] { } -> this is left sibling 2 |
|
220 |
return 2 |
|
221 |
} else { |
|
222 |
// should not happen |
|
223 |
println "** Error: should not happen, "+this.toString()+", "+n.toString() |
|
224 |
return -10 |
|
225 |
} |
|
226 |
} else if (start > n.start) { |
|
227 |
// { [ ] } -> n is parent -1 |
|
228 |
// { [ ]} -> n is parent -1 |
|
229 |
// { [ } ] -> *overlap 0 |
|
230 |
// { }[ ] -> n is left sibling -2 |
|
231 |
// { } [ ] -> n is left sibling -2 |
|
232 |
if (end < n.end) { |
|
233 |
// { [ ] } -> n is parent -1 |
|
234 |
return -1 |
|
235 |
} else if (end == n.end) { |
|
236 |
// { [ ]} -> n is parent -1 |
|
237 |
return -1 |
|
238 |
} else if (end > n.end && n.end > start) { |
|
239 |
// { [ } ] -> *overlap 0 |
|
240 |
println "** Error: overlapping intervals, { [ } ] should not happen, "+this.toString()+", "+n.toString() |
|
241 |
return 0 |
|
242 |
} else if (n.end == start) { |
|
243 |
// { }[ ] -> n is left sibling -2 |
|
244 |
return -2 |
|
245 |
} else if (n.end < start) |
|
246 |
// { } [ ] -> n is left sibling -2 |
|
247 |
return -2 |
|
248 |
} else { |
|
249 |
// should not happen |
|
250 |
println "** Error: should not happen, "+this.toString()+", "+n.toString() |
|
251 |
return -10 |
|
252 |
} |
|
253 |
} else { |
|
254 |
// [{ } ] -> this is parent 1 |
|
255 |
// [{ }] -> *duplicate 0 |
|
256 |
// [{ ] } -> this is child -1 |
|
257 |
// {[ ] } -> n is parent -1 |
|
258 |
// {[ ]} -> *duplicate 0 |
|
259 |
// {[ } ] -> n is child 1 |
|
260 |
if (end > n.end) { |
|
261 |
// [{ } ] -> this is parent 1 |
|
262 |
// {[ } ] -> n is child 1 |
|
263 |
return 1 |
|
264 |
} else if (end < n.end) { |
|
265 |
// [{ ] } -> this is child -1 |
|
266 |
// {[ ] } -> n is parent -1 |
|
267 |
return -1 |
|
268 |
} else if (end == n.end) { |
|
269 |
// [{ }] -> *duplicate 0 |
|
270 |
// {[ ]} -> *duplicate 0 |
|
271 |
println "** Error: duplicate intervals, [{ }] should not happen, "+this.toString()+", "+n.toString() |
|
272 |
return 0 |
|
273 |
} else { |
|
274 |
// should not happen |
|
275 |
println "** Error: should not happen, "+this.toString()+", "+n.toString() |
|
276 |
return -10 |
|
277 |
} |
|
278 |
} |
|
279 |
} |
|
280 |
|
|
281 |
public Node add(Node n) { |
|
282 |
|
|
283 |
switch (this.compareTo(n)) { |
|
284 |
|
|
285 |
case 1: |
|
286 |
|
|
287 |
|
|
288 |
childNode = new Node(n, s, e) |
|
289 |
childNode.parent = this |
|
290 |
this.children.add(childNode) |
|
291 |
return childNode |
|
292 |
} |
|
293 |
|
|
294 |
public toString(Node n) { |
|
295 |
sprintf("%s[%d, %d]", n.name, n.start, n.end) |
|
296 |
} |
|
297 |
|
|
298 |
public print(Node n) { |
|
299 |
print(n.toString()) |
|
300 |
} |
|
301 |
} |
|
302 |
|
|
303 |
*/ |
|
304 |
|
|
305 |
// First define the order theory over corpus structures intervals |
|
306 |
// by defining a binary comparator that will be used to build the |
|
307 |
// TreeSet of intervals |
|
308 |
|
|
309 |
class Struct implements Comparable<Struct> { |
|
310 |
|
|
311 |
String name |
|
312 |
Integer start |
|
313 |
Integer end |
|
314 |
|
|
315 |
Struct(String n, Integer s, Integer e) { |
|
316 |
name = n |
|
317 |
start = s |
|
318 |
end = e |
|
319 |
} |
|
320 |
|
|
321 |
public int compareTo(Struct s) { |
|
322 |
if (start < s.start && end > s.end) { // self contains s : [ { } ] |
|
323 |
//println sprintf("%s[%d, %d] ^ %s[%d, %d]", name, start, end, s.name, s.start, s.end) |
|
324 |
return -1 |
|
325 |
} else if (start > s.start && end < s.end) { // s contains self : { [ ] } |
|
326 |
//println sprintf("%s[%d, %d] v %s[%d, %d]", name, start, end, s.name, s.start, s.end) |
|
327 |
return 1 |
|
328 |
} else if (start == s.start && end == s.end) { // self and s have the same intervals : [{ }] |
|
329 |
//println sprintf("%s[%d, %d] = %s[%d, %d]", name, start, end, s.name, s.start, s.end) |
|
330 |
return name.compareTo(s.name) // use the lexicographic order of the structure names |
|
331 |
} else if (start < s.start) { // interval starting on the left comes first : [ { ... |
|
332 |
return -1 |
|
333 |
} else if (start > s.start) { // interval starting on the right comes after : { [ ... |
|
334 |
return 1 |
|
335 |
} else if (end > s.end) { // same start, interval ending on the right comes before : [{ } ]... |
|
336 |
return -1 |
|
337 |
} else if (end < s.end) { // same start, interval ending on the right comes before : [{ ] }... |
|
338 |
return -1 |
|
339 |
} else { // same start, same end : [{ ]}... |
|
340 |
return name.compareTo(s.name) // use the lexicographic order of the structure names |
|
341 |
} |
|
342 |
} |
|
343 |
|
|
344 |
/* |
|
345 |
public int compareTo(Struct s) { |
|
346 |
if (start < s.start) { // [ { ... |
|
347 |
if (end < s.end) { // [ { ] } * |
|
348 |
} else if (end > s.end) { // [ { } ] |
|
349 |
} else { // [ { }] |
|
350 |
} |
|
351 |
} else if (start > s.start) { // { [ ... |
|
352 |
if (end < s.end) { // { [ ] } |
|
353 |
} else if (end > s.end) { // { [ } ] * |
|
354 |
} else { // { [ ]} |
|
355 |
} |
|
356 |
} else { // {[ ... |
|
357 |
if (end < s.end) { // {[ ] } |
|
358 |
} else if (end > s.end) { // {[ } ] |
|
359 |
} else { // {[ ]} |
|
360 |
} |
|
361 |
|
|
362 |
|
|
363 |
//println sprintf("%s[%d, %d] ^ %s[%d, %d]", name, start, end, s.name, s.start, s.end) |
|
364 |
return -1 |
|
365 |
} else if (start > s.start && end < s.end) { // s contains self : { [ ] } |
|
366 |
//println sprintf("%s[%d, %d] v %s[%d, %d]", name, start, end, s.name, s.start, s.end) |
|
367 |
return 1 |
|
368 |
} else if (start == s.start && end == s.end) { // self and s have the same intervals : [{ }] |
|
369 |
//println sprintf("%s[%d, %d] = %s[%d, %d]", name, start, end, s.name, s.start, s.end) |
|
370 |
return name.compareTo(s.name) // use the lexicographic order of the structure names |
|
371 |
} else if (start < s.start) { // interval starting on the left comes first : [ { ... |
|
372 |
return -1 |
|
373 |
} else if (start > s.start) { // interval starting on the right comes after : { [ ... |
|
374 |
return 1 |
|
375 |
} else if (end > s.end) { // same start, interval ending on the right comes before : [{ } ]... |
|
376 |
return -1 |
|
377 |
} else if (end < s.end) { // same start, interval ending on the right comes before : [{ ] }... |
|
378 |
return -1 |
|
379 |
} else { // same start, same end : [{ ]}... |
|
380 |
return name.compareTo(s.name) // use the lexicographic order of the structure names |
|
381 |
} |
|
382 |
} |
|
383 |
*/ |
|
384 |
|
|
385 |
public toString(Struct s) { |
|
386 |
sprintf("%s[%d, %d]", s.name, s.start, s.end) |
|
387 |
} |
|
388 |
|
|
389 |
public print(Struct s) { |
|
390 |
print(s.toString()) |
|
391 |
} |
|
392 |
} |
|
393 |
|
|
394 |
// Now build the TreeSet of corpus structures intervals |
|
395 |
|
|
396 |
def h = new TreeSet<Struct>() |
|
397 |
|
|
398 |
struct_names.each { |
|
399 |
for (i in 0..corpusEngine.attributeSize("${corpusName}.${it}")-1) { |
|
400 |
(start, end) = corpusEngine.struc2Cpos("${corpusName}.${it}", i) |
|
401 |
//println sprintf("Adding %s[%d, %d]", it, start, end) |
|
402 |
h.add(new Struct(it, start, end)) |
|
403 |
} |
|
404 |
} |
|
405 |
|
|
406 |
// function to print the hierarchical index of a query |
|
407 |
def print_index = { c, q, p, cut -> |
|
408 |
|
|
409 |
corpusEngine.cqpQuery(c, "RES1", q) |
|
410 |
def matches_target_p = corpusEngine.cpos2Str("${c}.${p}", corpusEngine.dumpSubCorpus("${c}:RES1", CQI_CONST_FIELD_MATCH, 0, corpusEngine.subCorpusSize("${c}:RES1")-1)) |
|
411 |
if (cut > 0) { |
|
412 |
println matches_target_p.countBy { it }.sort { -it.value }.take(cut) |
|
413 |
} else { |
|
414 |
println matches_target_p.countBy { it }.sort { -it.value } |
|
415 |
} |
|
416 |
corpusEngine.dropSubCorpus("${c}:RES1") |
|
417 |
} |
|
418 |
|
|
419 |
// function to print the statistics of an index of a query |
|
420 |
def print_freq = { c, q, p -> |
|
421 |
|
|
422 |
// appel du moteur |
|
423 |
corpusEngine.cqpQuery(c, "RES1", q) |
|
424 |
def matches_target_p = corpusEngine.cpos2Id("${c}.${p}", corpusEngine.dumpSubCorpus("${c}:RES1", CQI_CONST_FIELD_MATCH, 0, corpusEngine.subCorpusSize("${c}:RES1")-1)) |
|
425 |
|
|
426 |
//println "" |
|
427 |
|
|
428 |
// afficher les positions de mots du résultat |
|
429 |
//println corpusEngine.dumpSubCorpus("${c}:RES1", CQI_CONST_FIELD_MATCH, 0, corpusEngine.subCorpusSize("${c}:RES1")-1) |
|
430 |
|
|
431 |
// afficher les codes des occurrences de la propriété du résultat |
|
432 |
//println matches_target_p |
|
433 |
|
|
434 |
// afficher l'index hiérarchique des codes du résultat |
|
435 |
//println matches_target_p.collect { it }.countBy { it }.sort { -it.value } |
|
436 |
|
|
437 |
// calculer la fréquence de chaque valeur et ne garder que les fréquences |
|
438 |
def index = matches_target_p.collect { it }.countBy { it } |
|
439 |
def freqs = index.values() |
|
440 |
|
|
441 |
// afficher la liste décroissante des fréquences du résultat |
|
442 |
//println freqs.sort { -it.value } |
|
443 |
|
|
444 |
def tC = corpusEngine.subCorpusSize("${c}:RES1") |
|
445 |
//def tF = freqs.sum() // control value |
|
446 |
def v = freqs.size() |
|
447 |
def fmin = freqs.min() |
|
448 |
def fmax = freqs.max() |
|
449 |
//println sprintf("t %d, v %d, fmin %d, fmax %d", tC, v, fmin, fmax) |
|
450 |
print sprintf("%d\t%d\t%d\t%d", tC, v, fmin, fmax) |
|
451 |
// afficher les valeurs des occurrences de la propriété du résultat |
|
452 |
if (displayIndex) { |
|
453 |
heads = index.sort { -it.value }.take(Vmax).keySet() |
|
454 |
println "\t"+heads.collect { corpusEngine.id2Str("${c}.${p}", it)[0] } |
|
455 |
} else { |
|
456 |
println "" |
|
457 |
} |
|
458 |
corpusEngine.dropSubCorpus("${c}:RES1") |
|
459 |
} |
|
460 |
|
|
461 |
if (propParam) { |
|
462 |
print sprintf("struct\tprop\tt\tv\tfmin\tfmax") |
|
463 |
} else { |
|
464 |
print sprintf("struct\tstart\tend\tt\tv\tfmin\tfmax") |
|
465 |
} |
|
466 |
|
|
467 |
if (displayIndex) { |
|
468 |
println sprintf("\tindex") |
|
469 |
} else { |
|
470 |
println "" |
|
471 |
} |
|
472 |
|
|
473 |
def env = System.getenv() |
|
474 |
def localPath = env["HOME"]+"/Documents/d3test" |
|
475 |
new File(localPath).mkdirs() |
|
476 |
|
|
477 |
// reset output file |
|
478 |
def resultFile = new File(localPath, "desc-partition.html") |
|
479 |
def result = new PrintWriter(resultFile) |
|
480 |
result.print("") |
|
481 |
result.close() |
|
482 |
|
|
483 |
resultFile << '''\ |
|
484 |
<!DOCTYPE html> |
|
485 |
<html> |
|
486 |
<head> |
|
487 |
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" charset="UTF-8"/> |
|
488 |
<link type="text/css" rel="stylesheet" href="style.css"/> |
|
489 |
<script type="text/javascript" src="d3/d3.v3.js" charset="utf-8"></script> |
|
490 |
<script type="text/javascript" src="d3/layout/partition.js" charset="utf-8"></script> |
|
491 |
<style type="text/css"> |
|
492 |
|
|
493 |
.chart { |
|
494 |
display: block; |
|
495 |
margin: auto; |
|
496 |
margin-top: 60px; |
|
497 |
font-size: 11px; |
|
498 |
} |
|
499 |
|
|
500 |
rect { |
|
501 |
stroke: #eee; |
|
502 |
fill: #aaa; |
|
503 |
fill-opacity: .8; |
|
504 |
} |
|
505 |
|
|
506 |
rect.parent { |
|
507 |
cursor: pointer; |
|
508 |
fill: steelblue; |
|
509 |
} |
|
510 |
|
|
511 |
text { |
|
512 |
pointer-events: none; |
|
513 |
} |
|
514 |
|
|
515 |
</style> |
|
516 |
</head> |
|
517 |
<body> |
|
518 |
<div id="body"> |
|
519 |
<div id="footer"> |
|
520 |
Structures hierarchy |
|
521 |
<div class="hint">click or shift-alt-click to zoom-in or out</div> |
|
522 |
</div> |
|
523 |
</div> |
|
524 |
<script type="text/javascript"> |
|
525 |
|
|
526 |
var w = 1120, |
|
527 |
h = 600, |
|
528 |
x = d3.scale.linear().range([0, w]), |
|
529 |
y = d3.scale.linear().range([0, h]); |
|
530 |
|
|
531 |
var vis = d3.select("#body").append("div") |
|
532 |
.attr("class", "chart") |
|
533 |
.style("width", w + "px") |
|
534 |
.style("height", h + "px") |
|
535 |
.append("svg:svg") |
|
536 |
.attr("width", w) |
|
537 |
.attr("height", h); |
|
538 |
|
|
539 |
var partition = d3.layout.partition() |
|
540 |
.value(function(d) { return d.size; }).sort(null); |
|
541 |
|
|
542 |
var tree = `{''' |
|
543 |
|
|
544 |
// Now iterate on the TreeSet to get a depth first search on the structure intervals |
|
545 |
|
|
546 |
def rec_struct_regex = /([^0-9]+)[0-9]+/ |
|
547 |
|
|
548 |
/* |
|
549 |
"name": "sha-hamlet", |
|
550 |
"children": [ |
|
551 |
{ |
|
552 |
"name": "sha-hamcast", |
|
553 |
"children": [ |
|
554 |
{ |
|
555 |
"name": "sha-ham1", |
|
556 |
"children": [ |
|
557 |
{"name": "sha-ham102", "size": 855}, |
|
558 |
{"name": "sha-ham103", "size": 464}, |
|
559 |
{"name": "sha-ham104", "size": 296}, |
|
560 |
{"name": "sha-ham105", "size": 635} |
|
561 |
] |
|
562 |
} |
|
563 |
] |
|
564 |
} |
|
565 |
] |
|
566 |
}`; |
|
567 |
*/ |
|
568 |
|
|
569 |
def displayTree = { head -> |
|
570 |
if (head) { |
|
571 |
subtree = h.tailSet(head) |
|
572 |
subtree.each { print sprintf("%s[%d, %d], ", it.name, it.start, it.end) } |
|
573 |
println "" |
|
574 |
if (subtree.size() == 0) { |
|
575 |
println sprintf("%s[%d, %d]", head.name, head.start, head.end) |
|
576 |
} else { |
|
577 |
displayTree(subtree) |
|
578 |
} |
|
579 |
} |
|
580 |
} |
|
581 |
|
|
582 |
//displayTree(h.first()) |
|
583 |
|
|
584 |
|
|
585 |
h.each { |
|
586 |
//println sprintf("Displaying %s[%d, %d]", it.name, it.start, it.end) |
|
587 |
if (propParam) { |
|
588 |
def rec_match = (it.name =~ rec_struct_regex) |
|
589 |
if (rec_match.size() == 1) { |
|
590 |
//println "Rec struct match = "+rec_match[0][1] |
|
591 |
istruct_name = rec_match[0][1] |
|
592 |
} else { |
|
593 |
//println "Struct match = "+it.name |
|
594 |
istruct_name = it.name |
|
595 |
} |
|
596 |
def struct_name = "${corpusName}.${istruct_name}_${corpusStructProps[it.name]}" |
|
597 |
print sprintf("%s\t%s\t", it.name, corpusEngine.struc2Str(struct_name, corpusEngine.cpos2Struc(struct_name, [it.start] as int[]))[0]) |
|
598 |
} else { |
|
599 |
def struct_name = "${corpusName}.${it.name}" |
|
600 |
print sprintf("%s\t%d\t%d\t", it.name, it.start, it.end) |
|
601 |
} |
|
602 |
print_freq(corpusName, sprintf("a:%s :: a>=%d & a<=%d", query, it.start, it.end), wordProperty) |
|
603 |
} |
|
604 |
|
Formats disponibles : Unified diff