Révision 3238
tmp/org.txm.groovy.core/src/groovy/org/txm/macro/projects/dth/AnnotateHobbesBiblicalReferencesMacro.groovy (revision 3238) | ||
---|---|---|
1 |
// Copyright © 2021 ENS de Lyon, CNRS, University of Franche-Comté |
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses) |
|
3 |
// @author sheiden |
|
4 |
// @author mdecorde |
|
5 |
|
|
6 |
// STANDARD DECLARATIONS |
|
7 |
package org.txm.macro.projets.dth |
|
8 |
|
|
9 |
import org.kohsuke.args4j.* |
|
10 |
import groovy.transform.Field |
|
11 |
import org.txm.rcp.swt.widget.parameters.* |
|
12 |
import org.txm.rcp.editors.concordances.* |
|
13 |
import org.txm.searchengine.cqp.corpus.CQPCorpus |
|
14 |
import org.txm.searchengine.cqp.corpus.MainCorpus |
|
15 |
import org.txm.functions.concordances.* |
|
16 |
import org.txm.annotation.urs.* |
|
17 |
import org.txm.concordance.core.functions.Concordance |
|
18 |
import org.txm.concordance.rcp.editors.ConcordanceEditor |
|
19 |
import visuAnalec.elements.Unite |
|
20 |
import visuAnalec.vue.Vue |
|
21 |
import org.txm.searchengine.cqp.CQPSearchEngine |
|
22 |
import org.apache.commons.lang.StringUtils |
|
23 |
import org.txm.macro.cqp.CQPUtils |
|
24 |
import org.txm.searchengine.cqp.ICqiClient |
|
25 |
import org.txm.utils.i18n.LangFormater |
|
26 |
|
|
27 |
move_start = 0 |
|
28 |
move_end = 0 |
|
29 |
create_only_if_new = true |
|
30 |
// Biblical References CQL Query |
|
31 |
query = /[word="[1-2]\.?"]? @[word="Mat\.|Acts|Gen\.|Cor\.|chap\.|Deut\.|Exod\.|Sam\.|Rom\.|Luke|Matth\.|Iohn|Joh\.|Kings|Act\.|John|Prov\.|Tim\.|Heb\.|Mark|Pet\.|Psal\.|Gal\.|Tit\.|Isaiah|Levit\.|Ezek\.|Math\.|Col\.|Math|Numb\.|Numbers|Zach\.|Chron\.|Eph\.|Epist\.|Isay|Luk\.|Marke|Dan\.|Ephes\.|Esay|Ioh\.|Isai|Jo\.|Job|Josh\.|Mar\.|Proverbs|Psalm|Vers\.|Colos\.|Ierem\.|Iosh\.|Jer\.|Jerem\.|Judges|Micah|Rev\.|Rom|Thess\.|Titus|Ac\.|Act|Apoc\.|Apocalypse|Baruch|Chro\.|Coll\.|Deuteronomy|Eccles\.|Ecclesiastes|Ecclus\.|Esdras|Exo\.|Exod|Ezektel|Gen|Haggai|Iames|Ier\.|Iob\.|Ioel|Ioshua|Isa|Isa\.|Iud\.|Iude|Iudg\.|Joel|Joshua|Jud\.|Judg\.|Matt\.|Matthew|Num\.|Numb|Psa\.|Ruth|Timothy|Zach"] [pos="p"]? [word="[0-9].*"] ([word="[0-9].*|v.*|\p{P}" & word!="\)|]|:|\("])*/ |
|
32 |
bibRefs = "BibRefs" |
|
33 |
|
|
34 |
bibBooks = [ |
|
35 |
"Ac.":"Acts", |
|
36 |
"Act":"Acts", |
|
37 |
"Act.":"Acts", |
|
38 |
"Acts":"Acts", |
|
39 |
"Apoc.":"Revelation", |
|
40 |
"Apocalypse":"Revelation", |
|
41 |
"Baruch":"Baruch", |
|
42 |
"chap.":"/chap./", |
|
43 |
"Chro.":"Chronicles", |
|
44 |
"Chron.":"Chronicles", |
|
45 |
"Col.":"Colossians", |
|
46 |
"Coll.":"Colossians", |
|
47 |
"Colos.":"Colossians", |
|
48 |
"Cor.":"Corinthians", |
|
49 |
"Dan.":"Daniel", |
|
50 |
"Deut.":"Deuteronomy", |
|
51 |
"Deuteronomy":"Deuteronomy", |
|
52 |
"Eccles.":"Ecclesiastes", |
|
53 |
"Ecclesiastes":"Ecclesiastes", |
|
54 |
"Ecclus.":"Ecclesiasticus", |
|
55 |
"Eph.":"Ephesians", |
|
56 |
"Ephes.":"Ephesians", |
|
57 |
"Epist.":"/Epist./", |
|
58 |
"Esay":"Isaiah", |
|
59 |
"Esdras":"Esdras", |
|
60 |
"Exo.":"Exodus", |
|
61 |
"Exod":"Exodus", |
|
62 |
"Exod.":"Exodus", |
|
63 |
"Ezek.":"Ezekiel", |
|
64 |
"Gal.":"Galatians", |
|
65 |
"Gen":"Genesis", |
|
66 |
"Gen.":"Genesis", |
|
67 |
"Haggai":"Haggai", |
|
68 |
"Heb.":"Hebrews", |
|
69 |
"Iames":"James", |
|
70 |
"Ier.":"Jeremiah", |
|
71 |
"Ierem.":"Jeremiah", |
|
72 |
"Ioel":"Joel", |
|
73 |
"Ioh.":"John", |
|
74 |
"Iohn":"John", |
|
75 |
"Iosh.":"Joshua", |
|
76 |
"Ioshua":"Joshua", |
|
77 |
"Isa":"Isaiah", |
|
78 |
"Isa.":"Isaiah", |
|
79 |
"Isai":"Isaiah", |
|
80 |
"Isaiah":"Isaiah", |
|
81 |
"Isay":"Isaiah", |
|
82 |
"Iud.":"Jude", |
|
83 |
"Iude":"Jude", |
|
84 |
"Iudg.":"Judges", |
|
85 |
"Jer.":"Jeremiah", |
|
86 |
"Jerem.":"Jeremiah", |
|
87 |
"Jo.":"John", |
|
88 |
"Jo.":"Job", |
|
89 |
"Job":"Job", |
|
90 |
"Joel":"Joel", |
|
91 |
"Joh.":"John", |
|
92 |
"John":"John", |
|
93 |
"Josh.":"Joshua", |
|
94 |
"Joshua":"Joshua", |
|
95 |
"Jud.":"Jude", |
|
96 |
"Judg.":"Judges", |
|
97 |
"Judges":"Judges", |
|
98 |
"Kings":"Kings", |
|
99 |
"Levit.":"Leviticus", |
|
100 |
"Luk.":"Luke", |
|
101 |
"Luke":"Luke", |
|
102 |
"Mar.":"Mark", |
|
103 |
"Mark":"Mark", |
|
104 |
"Marke":"Mark", |
|
105 |
"Mat.":"Matthew", |
|
106 |
"Math":"Matthew", |
|
107 |
"Math.":"Matthew", |
|
108 |
"Matt.":"Matthew", |
|
109 |
"Matth.":"Matthew", |
|
110 |
"Matthew":"Matthew", |
|
111 |
"Micah":"Micah", |
|
112 |
"Num.":"Numbers", |
|
113 |
"Numb":"Numbers", |
|
114 |
"Numb.":"Numbers", |
|
115 |
"Numbers":"Numbers", |
|
116 |
"Pet.":"Peter", |
|
117 |
"Prov.":"Proverbs", |
|
118 |
"Proverbs":"Proverbs", |
|
119 |
"Psa.":"Psalms", |
|
120 |
"Psal.":"Psalms", |
|
121 |
"Psalm":"Psalms", |
|
122 |
"Rev.":"Revelation", |
|
123 |
"Rom":"Romans", |
|
124 |
"Rom.":"Romans", |
|
125 |
"Ruth":"Ruth", |
|
126 |
"Sam.":"Samuel", |
|
127 |
"Thess.":"Thessalonians", |
|
128 |
"Tim.":"Timothy", |
|
129 |
"Timothy":"Timothy", |
|
130 |
"Tit.":"Titus", |
|
131 |
"Titus":"Titus", |
|
132 |
"Vers.":"/Vers./", |
|
133 |
"Zach":"Zechariah", |
|
134 |
"Zach.":"Zechariah", |
|
135 |
] |
|
136 |
|
|
137 |
bibAbbr = [ |
|
138 |
"Ac.":"Acts", |
|
139 |
"Act":"Acts", |
|
140 |
"Act.":"Acts", |
|
141 |
"Acts":"Acts", |
|
142 |
"Apoc.":"Rev.", |
|
143 |
"Apocalypse":"Rev.", |
|
144 |
"Baruch":"Bar.", |
|
145 |
"chap.":"/chap./", |
|
146 |
"Chro.":"Chr.", |
|
147 |
"Chron.":"Chr.", |
|
148 |
"Col.":"Col.", |
|
149 |
"Coll.":"Col.", |
|
150 |
"Colos.":"Col.", |
|
151 |
"Cor.":"Cor.", |
|
152 |
"Dan.":"Dan.", |
|
153 |
"Deut.":"Deut.", |
|
154 |
"Deuteronomy":"Deut.", |
|
155 |
"Eccles.":"Eccles.", |
|
156 |
"Ecclesiastes":"Eccles.", |
|
157 |
"Ecclus.":"Ecclus.", |
|
158 |
"Eph.":"Eph.", |
|
159 |
"Ephes.":"Eph.", |
|
160 |
"Epist.":"/Epist./", |
|
161 |
"Esay":"Isa.", |
|
162 |
"Esdras":"Esdras", |
|
163 |
"Exo.":"Exod.", |
|
164 |
"Exod":"Exod.", |
|
165 |
"Exod.":"Exod.", |
|
166 |
"Ezek.":"Ezek.", |
|
167 |
"Gal.":"Gal.", |
|
168 |
"Gen":"Gen.", |
|
169 |
"Gen.":"Gen.", |
|
170 |
"Haggai":"Hag.", |
|
171 |
"Heb.":"Heb.", |
|
172 |
"Iames":"Jas.", |
|
173 |
"Ier.":"Jer.", |
|
174 |
"Ierem.":"Jer.", |
|
175 |
"Ioel":"Joel", |
|
176 |
"Ioh.":"John", |
|
177 |
"Iohn":"John", |
|
178 |
"Iosh.":"Josh.", |
|
179 |
"Ioshua":"Josh.", |
|
180 |
"Isa":"Isa.", |
|
181 |
"Isa.":"Isa.", |
|
182 |
"Isai":"Isa.", |
|
183 |
"Isaiah":"Isa.", |
|
184 |
"Isay":"Isa.", |
|
185 |
"Iud.":"Jude", |
|
186 |
"Iude":"Jude", |
|
187 |
"Iudg.":"Judg.", |
|
188 |
"Jer.":"Jer.", |
|
189 |
"Jerem.":"Jer.", |
|
190 |
"Jo.":"Kgs", |
|
191 |
"Jo.":"Job", |
|
192 |
"Job":"Job", |
|
193 |
"Joel":"Joel", |
|
194 |
"Joh.":"John", |
|
195 |
"John":"John", |
|
196 |
"Josh.":"Josh.", |
|
197 |
"Joshua":"Josh.", |
|
198 |
"Jud.":"Jude", |
|
199 |
"Judg.":"Judg.", |
|
200 |
"Judges":"Judg.", |
|
201 |
"Kings":"Kgs", |
|
202 |
"Levit.":"Lev.", |
|
203 |
"Luk.":"Luke", |
|
204 |
"Luke":"Luke", |
|
205 |
"Mar.":"Mark", |
|
206 |
"Mark":"Mark", |
|
207 |
"Marke":"Mark", |
|
208 |
"Mat.":"Matt.", |
|
209 |
"Math":"Matt.", |
|
210 |
"Math.":"Matt.", |
|
211 |
"Matt.":"Matt.", |
|
212 |
"Matth.":"Matt.", |
|
213 |
"Matthew":"Matt.", |
|
214 |
"Micah":"Mic.", |
|
215 |
"Num.":"Num.", |
|
216 |
"Numb":"Num.", |
|
217 |
"Numb.":"Num.", |
|
218 |
"Numbers":"Num.", |
|
219 |
"Pet.":"Pet.", |
|
220 |
"Prov.":"Prov.", |
|
221 |
"Proverbs":"Prov.", |
|
222 |
"Psa.":"Ps.", |
|
223 |
"Psal.":"Ps.", |
|
224 |
"Psalm":"Ps.", |
|
225 |
"Rev.":"Rev.", |
|
226 |
"Rom":"Rom.", |
|
227 |
"Rom.":"Rom.", |
|
228 |
"Ruth":"Ruth", |
|
229 |
"Sam.":"Sam.", |
|
230 |
"Thess.":"Thess.", |
|
231 |
"Tim.":"Tim.", |
|
232 |
"Timothy":"Tim.", |
|
233 |
"Tit.":"Titus", |
|
234 |
"Titus":"Titus", |
|
235 |
"Vers.":"/Vers./", |
|
236 |
"Zach":"Zech.", |
|
237 |
"Zach.":"Zech.", |
|
238 |
] |
|
239 |
|
|
240 |
/* |
|
241 |
println "bibAbbr lengths :" |
|
242 |
println "len\tF" |
|
243 |
bibAbbr.collect { it.value }.sort().unique().collect { it.length() }.sort().countBy { it }.each { println sprintf("%1d\t%2d", it.key, it.value) } |
|
244 |
(5..8).each { max -> |
|
245 |
abbrs = bibAbbr.collect { it.value }.sort().unique().findAll { it.length() == max } |
|
246 |
println max+" : "+abbrs |
|
247 |
} |
|
248 |
*/ |
|
249 |
|
|
250 |
// check for a corpus selection |
|
251 |
utils = new CQPUtils() |
|
252 |
corpusEngine = CQPSearchEngine.getCqiClient() |
|
253 |
|
|
254 |
corpora = utils.getCorpora(this) |
|
255 |
scriptName = this.class.getSimpleName() |
|
256 |
|
|
257 |
if ((corpora == null) || corpora.size() > 1) { |
|
258 |
println "** $scriptName: please select a corpus in the Corpus view or provide a corpus name. Aborting." |
|
259 |
return false |
|
260 |
} |
|
261 |
|
|
262 |
corpus = corpora[0].getMainCorpus() |
|
263 |
corpusName = corpus.getName() |
|
264 |
wordProperty = corpus.getProperty("word") |
|
265 |
|
|
266 |
if (!URSCorpora.isAnnotationStructureReady(corpus)) { |
|
267 |
println "** URS Annotation Structure of "+corpusName+" is not ready. Aborting." |
|
268 |
return |
|
269 |
} |
|
270 |
|
|
271 |
// check for corpus annotation structure unit types |
|
272 |
def analecCorpus = URSCorpora.getCorpus(corpus) |
|
273 |
|
|
274 |
// Composite Biblical Reference : number, book, chapters_verses_list, chapters_verses_form |
|
275 |
def crType = "Composite Biblical Reference" |
|
276 |
if (!analecCorpus.getStructure().getUnites().contains(crType)) { |
|
277 |
analecCorpus.getStructure().ajouterType(Unite.class, crType) |
|
278 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "reference_form") |
|
279 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "reference_id") |
|
280 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "number") |
|
281 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "book") |
|
282 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "chapters_verses_form") |
|
283 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "books_chapters_verses_list") |
|
284 |
} |
|
285 |
|
|
286 |
// Biblical Reference : number, book, chapter, verse |
|
287 |
def rType = "Biblical Reference" |
|
288 |
if (!analecCorpus.getStructure().getUnites().contains(rType)) { |
|
289 |
analecCorpus.getStructure().ajouterType(Unite.class, rType) |
|
290 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "number") |
|
291 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "book") |
|
292 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "chapter") |
|
293 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "verse") |
|
294 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "raw_verses") |
|
295 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "cr_id") |
|
296 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "type") |
|
297 |
} |
|
298 |
|
|
299 |
// check for corpus annotation structure input form |
|
300 |
def vue = URSCorpora.getVue(corpus) |
|
301 |
if (!vue.getTypesAVoir(Unite.class).contains(crType)) { |
|
302 |
vue.ajouterType(Unite.class, crType) |
|
303 |
vue.ajouterProp(Unite.class, crType, "reference_id") |
|
304 |
vue.ajouterProp(Unite.class, crType, "reference_form") |
|
305 |
vue.ajouterProp(Unite.class, crType, "books_chapters_verses_list") |
|
306 |
vue.ajouterProp(Unite.class, crType, "chapters_verses_form") |
|
307 |
vue.ajouterProp(Unite.class, crType, "book") |
|
308 |
vue.ajouterProp(Unite.class, crType, "number") |
|
309 |
} |
|
310 |
|
|
311 |
if (!vue.getTypesAVoir(Unite.class).contains(rType)) { |
|
312 |
vue.ajouterType(Unite.class, rType) |
|
313 |
vue.ajouterProp(Unite.class, rType, "type") |
|
314 |
vue.ajouterProp(Unite.class, rType, "cr_id") |
|
315 |
vue.ajouterProp(Unite.class, rType, "raw_verses") |
|
316 |
vue.ajouterProp(Unite.class, rType, "verse") |
|
317 |
vue.ajouterProp(Unite.class, rType, "chapter") |
|
318 |
vue.ajouterProp(Unite.class, rType, "book") |
|
319 |
vue.ajouterProp(Unite.class, rType, "number") |
|
320 |
} |
|
321 |
|
|
322 |
// manage cqp matching strategy |
|
323 |
cqp_matching_strategy = System.getProperty("cqp_matching_strategy") |
|
324 |
|
|
325 |
if (!(cqp_matching_strategy == 'longest')) { |
|
326 |
println "Changing MatchingStrategy from '"+cqp_matching_strategy+"' to 'longest'" |
|
327 |
corpusEngine.query("set MatchingStrategy longest;") |
|
328 |
System.setProperty("cqp_matching_strategy", 'longest') |
|
329 |
} |
|
330 |
|
|
331 |
corpusEngine.cqpQuery(corpusName, bibRefs, query) |
|
332 |
nmatches = corpusEngine.subCorpusSize("$corpusName:$bibRefs") |
|
333 |
|
|
334 |
if (nmatches == 0) { |
|
335 |
println "** No references found. Aborting." |
|
336 |
return |
|
337 |
} |
|
338 |
|
|
339 |
println "Found "+nmatches+" biblical references." |
|
340 |
|
|
341 |
println "Removing all <"+rType+"> units..." |
|
342 |
analecCorpus.getUnites(rType).toArray(new Unite[0]).each { analecCorpus.supUnite(it) } |
|
343 |
|
|
344 |
nRUnits = 0 |
|
345 |
|
|
346 |
def ScanChapsVers (book, num, chapsVers) { |
|
347 |
|
|
348 |
def lookingForChap = true |
|
349 |
def restIsVerse = false |
|
350 |
def CVList = [] |
|
351 |
def maxVerse = 0 |
|
352 |
def currentChap = 0 |
|
353 |
def bookName |
|
354 |
|
|
355 |
if (num.length() > 0) { |
|
356 |
num += " " |
|
357 |
} |
|
358 |
|
|
359 |
if (bibAbbr.containsKey(book)) { |
|
360 |
bookName = num+bibAbbr[book] |
|
361 |
} else { |
|
362 |
println "** unkown book name: "+book |
|
363 |
bookName = num+"?"+book+"?" |
|
364 |
} |
|
365 |
|
|
366 |
chapsVers.each { |
|
367 |
|
|
368 |
/* |
|
369 |
15. vers. 22 , 23 , 24. 15.22,15.23,15.24 chap-VERS-verses |
|
370 |
8. vers. 9. 8.9 chap-VERS-verse |
|
371 |
4. v. 14 4.14 chap-V-verse |
|
372 |
14. ver. 34. 14.34 chap-VER-verse |
|
373 |
3. ver. 11 , 12 , 3.11,3.12 chap-VER-verses |
|
374 |
9. 9. 9.9 chap-verse |
|
375 |
2. 13. 14 . 2.13,2.14 chap-verses |
|
376 |
4. 41. 5. 26. 4.41,5.26 chap-verse-chap-verse |
|
377 |
9. 13. 9. 21. 10. 12. 12. 19. 9.13,9.21,10.12,12.19 chaps-verses |
|
378 |
9. 13. 9. 21. 10. 12. 12. 19. 2 9.13,9.21,10.12,12.19 chaps-verses-num |
|
379 |
3. 11 8. 3.11,8 chap-verse-chap |
|
380 |
5. 3 , 4 , & 5. 5.3,5.4,5.5 chap-verses-&-verse |
|
381 |
5. 5. 7. 13 , 15. 27. 6. & 30. 25. 5.5,5.7,5.13,5.15,5.27,6,30.25 chap-verses-&-chap-verse |
|
382 |
15. 15 chap |
|
383 |
*/ |
|
384 |
|
|
385 |
if (it != "," && it != "&" && it != "." && it != "[") { |
|
386 |
|
|
387 |
// remove trailing '.' |
|
388 |
it = (it ==~ /.*\./) ? it.substring(0, it.length()-1) : it |
|
389 |
|
|
390 |
if (restIsVerse) { |
|
391 |
if (it ==~ /[0-9].*/) { |
|
392 |
maxVerse = (it as Integer) |
|
393 |
CVList << [bookName, currentChap, (it as Integer)] |
|
394 |
} |
|
395 |
} else if (lookingForChap) { |
|
396 |
currentChap = (it as Integer) |
|
397 |
lookingForChap = false |
|
398 |
} else if (it ==~ /v.*/) { |
|
399 |
restIsVerse = true |
|
400 |
} else if ((it as Integer) > maxVerse) { |
|
401 |
maxVerse = (it as Integer) |
|
402 |
CVList << [bookName, currentChap, (it as Integer)] |
|
403 |
} else { |
|
404 |
currentChap = (it as Integer) |
|
405 |
maxVerse = 0 |
|
406 |
} |
|
407 |
} |
|
408 |
} |
|
409 |
|
|
410 |
if (maxVerse == 0) { |
|
411 |
CVList << [bookName, currentChap, 0] |
|
412 |
} |
|
413 |
|
|
414 |
// return CVList.toString() |
|
415 |
return CVList |
|
416 |
|
|
417 |
} |
|
418 |
|
|
419 |
def starts = corpusEngine.dumpSubCorpus("$corpusName:$bibRefs", ICqiClient.CQI_CONST_FIELD_MATCH, 0, nmatches-1) |
|
420 |
def targets = corpusEngine.dumpSubCorpus("$corpusName:$bibRefs", ICqiClient.CQI_CONST_FIELD_TARGET, 0, nmatches-1) |
|
421 |
def ends = corpusEngine.dumpSubCorpus("$corpusName:$bibRefs", ICqiClient.CQI_CONST_FIELD_MATCHEND, 0, nmatches-1) |
|
422 |
|
|
423 |
[starts, targets, ends].transpose().each { |
|
424 |
//[starts, targets, ends].transpose().collect { |
|
425 |
|
|
426 |
iUnit = 0 |
|
427 |
|
|
428 |
if (it[0] < it[1]) { // number+book+chaps-verses |
|
429 |
def refForm = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]..it[2]) as int[]).join(' ') |
|
430 |
def number = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]..(it[1]-1)) as int[]).join(' ') |
|
431 |
number = (number ==~ /.*\./) ? number.substring(0, number.length()-1) : number |
|
432 |
def book = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[1]..it[1]) as int[]).join(' ') |
|
433 |
def chapsVersForms = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[1]+1)..it[2] as int[]).join(' ') |
|
434 |
def chapsVersFormsList = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[1]+1)..it[2] as int[]) as String[] |
|
435 |
def chapsVersList = ScanChapsVers(book, number, chapsVersFormsList) |
|
436 |
|
|
437 |
chapsVersList.reverse().each { b, c, v -> |
|
438 |
|
|
439 |
def properties = [:] |
|
440 |
properties["number"] = number |
|
441 |
properties["book"] = b |
|
442 |
properties["chapter"] = c as String |
|
443 |
properties["verse"] = v as String |
|
444 |
properties["raw_verses"] = LangFormater.format(chapsVersForms, corpus.getLang()) |
|
445 |
properties["cr_id"] = 0 as String |
|
446 |
properties["type"] = "word" |
|
447 |
|
|
448 |
analecCorpus.addUniteSaisie(rType, it[0], it[2], properties) |
|
449 |
nRUnits++ |
|
450 |
|
|
451 |
} |
|
452 |
|
|
453 |
// return refForm+'\t"'+refForm.replaceAll(/( )/,'"$1"')+'"\t'+number+'\t'+book+'\t'+chapsVersForms+'\t'+chapsVersList |
|
454 |
// return chapsVersList as String[] |
|
455 |
|
|
456 |
} else { // book+chaps-verses |
|
457 |
def refForm = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]..it[2]) as int[]).join(' ') |
|
458 |
def book = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]..it[0]) as int[]).join(' ') |
|
459 |
def chapsVersForms = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]+1)..it[2] as int[]).join(' ') |
|
460 |
def chapsVersFormsList = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]+1)..it[2] as int[]) as String[] |
|
461 |
def chapsVersList = ScanChapsVers(book, "", chapsVersFormsList) |
|
462 |
|
|
463 |
chapsVersList.reverse().each { b, c, v -> |
|
464 |
|
|
465 |
def properties = [:] |
|
466 |
properties["number"] = "" |
|
467 |
properties["book"] = b |
|
468 |
properties["chapter"] = c as String |
|
469 |
properties["raw_verses"] = LangFormater.format(chapsVersForms, corpus.getLang()) |
|
470 |
properties["verse"] = v as String |
|
471 |
properties["cr_id"] = 0 as String |
|
472 |
properties["type"] = "word" |
|
473 |
|
|
474 |
analecCorpus.addUniteSaisie(rType, it[0], it[2], properties) |
|
475 |
nRUnits++ |
|
476 |
|
|
477 |
} |
|
478 |
|
|
479 |
// return refForm+'\t"'+refForm.replaceAll(/( )/,'"$1"')+'"\t'+'\t'+book+'\t'+chapsVersForms+'\t'+chapsVersList |
|
480 |
// return chapsVersList as String[] |
|
481 |
} |
|
482 |
} |
|
483 |
// }.countBy { it }.sort { a,b -> -a.value <=> -b.value ?: a.key <=> b.key }.each { |
|
484 |
// }.flatten().countBy { it }.sort { a,b -> -a.value <=> -b.value ?: a.key <=> b.key }.each { |
|
485 |
// println it.key+"\t"+it.value |
|
486 |
// } |
|
487 |
|
|
488 |
if (!(cqp_matching_strategy == 'longest')) { |
|
489 |
println "Changing MatchingStrategy back to '"+cqp_matching_strategy+"'" |
|
490 |
corpusEngine.query("set MatchingStrategy "+cqp_matching_strategy+";") |
|
491 |
System.setProperty("cqp_matching_strategy", cqp_matching_strategy) |
|
492 |
} |
|
493 |
|
|
494 |
println "$nRUnits <$rType> units created." |
|
495 |
|
|
496 |
print "Saving units... " |
|
497 |
URSCorpora.saveCorpus(analecCorpus) |
|
498 |
println "Done." |
|
499 |
|
|
500 |
if (nRUnits > 0) corpus.setIsModified(true) |
|
501 |
|
Formats disponibles : Unified diff