|
1 |
// Copyright © 2021 ENS de Lyon, CNRS, University of Franche-Comté
|
|
2 |
// Licensed under the terms of the GNU General Public License (http://www.gnu.org/licenses)
|
|
3 |
// @author sheiden
|
|
4 |
// @author mdecorde
|
|
5 |
|
|
6 |
// STANDARD DECLARATIONS
|
|
7 |
package org.txm.macro.projets.dth
|
|
8 |
|
|
9 |
import org.kohsuke.args4j.*
|
|
10 |
import groovy.transform.Field
|
|
11 |
import org.txm.rcp.swt.widget.parameters.*
|
|
12 |
import org.txm.rcp.editors.concordances.*
|
|
13 |
import org.txm.searchengine.cqp.corpus.CQPCorpus
|
|
14 |
import org.txm.searchengine.cqp.corpus.MainCorpus
|
|
15 |
import org.txm.functions.concordances.*
|
|
16 |
import org.txm.annotation.urs.*
|
|
17 |
import org.txm.concordance.core.functions.Concordance
|
|
18 |
import org.txm.concordance.rcp.editors.ConcordanceEditor
|
|
19 |
import visuAnalec.elements.Unite
|
|
20 |
import visuAnalec.vue.Vue
|
|
21 |
import org.txm.searchengine.cqp.CQPSearchEngine
|
|
22 |
import org.apache.commons.lang.StringUtils
|
|
23 |
import org.txm.macro.cqp.CQPUtils
|
|
24 |
import org.txm.searchengine.cqp.ICqiClient
|
|
25 |
import org.txm.utils.i18n.LangFormater
|
|
26 |
|
|
27 |
move_start = 0
|
|
28 |
move_end = 0
|
|
29 |
create_only_if_new = true
|
|
30 |
// Biblical References CQL Query
|
|
31 |
query = /[word="[1-2]\.?"]? @[word="Mat\.|Acts|Gen\.|Cor\.|chap\.|Deut\.|Exod\.|Sam\.|Rom\.|Luke|Matth\.|Iohn|Joh\.|Kings|Act\.|John|Prov\.|Tim\.|Heb\.|Mark|Pet\.|Psal\.|Gal\.|Tit\.|Isaiah|Levit\.|Ezek\.|Math\.|Col\.|Math|Numb\.|Numbers|Zach\.|Chron\.|Eph\.|Epist\.|Isay|Luk\.|Marke|Dan\.|Ephes\.|Esay|Ioh\.|Isai|Jo\.|Job|Josh\.|Mar\.|Proverbs|Psalm|Vers\.|Colos\.|Ierem\.|Iosh\.|Jer\.|Jerem\.|Judges|Micah|Rev\.|Rom|Thess\.|Titus|Ac\.|Act|Apoc\.|Apocalypse|Baruch|Chro\.|Coll\.|Deuteronomy|Eccles\.|Ecclesiastes|Ecclus\.|Esdras|Exo\.|Exod|Ezektel|Gen|Haggai|Iames|Ier\.|Iob\.|Ioel|Ioshua|Isa|Isa\.|Iud\.|Iude|Iudg\.|Joel|Joshua|Jud\.|Judg\.|Matt\.|Matthew|Num\.|Numb|Psa\.|Ruth|Timothy|Zach"] [pos="p"]? [word="[0-9].*"] ([word="[0-9].*|v.*|\p{P}" & word!="\)|]|:|\("])*/
|
|
32 |
bibRefs = "BibRefs"
|
|
33 |
|
|
34 |
bibBooks = [
|
|
35 |
"Ac.":"Acts",
|
|
36 |
"Act":"Acts",
|
|
37 |
"Act.":"Acts",
|
|
38 |
"Acts":"Acts",
|
|
39 |
"Apoc.":"Revelation",
|
|
40 |
"Apocalypse":"Revelation",
|
|
41 |
"Baruch":"Baruch",
|
|
42 |
"chap.":"/chap./",
|
|
43 |
"Chro.":"Chronicles",
|
|
44 |
"Chron.":"Chronicles",
|
|
45 |
"Col.":"Colossians",
|
|
46 |
"Coll.":"Colossians",
|
|
47 |
"Colos.":"Colossians",
|
|
48 |
"Cor.":"Corinthians",
|
|
49 |
"Dan.":"Daniel",
|
|
50 |
"Deut.":"Deuteronomy",
|
|
51 |
"Deuteronomy":"Deuteronomy",
|
|
52 |
"Eccles.":"Ecclesiastes",
|
|
53 |
"Ecclesiastes":"Ecclesiastes",
|
|
54 |
"Ecclus.":"Ecclesiasticus",
|
|
55 |
"Eph.":"Ephesians",
|
|
56 |
"Ephes.":"Ephesians",
|
|
57 |
"Epist.":"/Epist./",
|
|
58 |
"Esay":"Isaiah",
|
|
59 |
"Esdras":"Esdras",
|
|
60 |
"Exo.":"Exodus",
|
|
61 |
"Exod":"Exodus",
|
|
62 |
"Exod.":"Exodus",
|
|
63 |
"Ezek.":"Ezekiel",
|
|
64 |
"Gal.":"Galatians",
|
|
65 |
"Gen":"Genesis",
|
|
66 |
"Gen.":"Genesis",
|
|
67 |
"Haggai":"Haggai",
|
|
68 |
"Heb.":"Hebrews",
|
|
69 |
"Iames":"James",
|
|
70 |
"Ier.":"Jeremiah",
|
|
71 |
"Ierem.":"Jeremiah",
|
|
72 |
"Ioel":"Joel",
|
|
73 |
"Ioh.":"John",
|
|
74 |
"Iohn":"John",
|
|
75 |
"Iosh.":"Joshua",
|
|
76 |
"Ioshua":"Joshua",
|
|
77 |
"Isa":"Isaiah",
|
|
78 |
"Isa.":"Isaiah",
|
|
79 |
"Isai":"Isaiah",
|
|
80 |
"Isaiah":"Isaiah",
|
|
81 |
"Isay":"Isaiah",
|
|
82 |
"Iud.":"Jude",
|
|
83 |
"Iude":"Jude",
|
|
84 |
"Iudg.":"Judges",
|
|
85 |
"Jer.":"Jeremiah",
|
|
86 |
"Jerem.":"Jeremiah",
|
|
87 |
"Jo.":"John",
|
|
88 |
"Jo.":"Job",
|
|
89 |
"Job":"Job",
|
|
90 |
"Joel":"Joel",
|
|
91 |
"Joh.":"John",
|
|
92 |
"John":"John",
|
|
93 |
"Josh.":"Joshua",
|
|
94 |
"Joshua":"Joshua",
|
|
95 |
"Jud.":"Jude",
|
|
96 |
"Judg.":"Judges",
|
|
97 |
"Judges":"Judges",
|
|
98 |
"Kings":"Kings",
|
|
99 |
"Levit.":"Leviticus",
|
|
100 |
"Luk.":"Luke",
|
|
101 |
"Luke":"Luke",
|
|
102 |
"Mar.":"Mark",
|
|
103 |
"Mark":"Mark",
|
|
104 |
"Marke":"Mark",
|
|
105 |
"Mat.":"Matthew",
|
|
106 |
"Math":"Matthew",
|
|
107 |
"Math.":"Matthew",
|
|
108 |
"Matt.":"Matthew",
|
|
109 |
"Matth.":"Matthew",
|
|
110 |
"Matthew":"Matthew",
|
|
111 |
"Micah":"Micah",
|
|
112 |
"Num.":"Numbers",
|
|
113 |
"Numb":"Numbers",
|
|
114 |
"Numb.":"Numbers",
|
|
115 |
"Numbers":"Numbers",
|
|
116 |
"Pet.":"Peter",
|
|
117 |
"Prov.":"Proverbs",
|
|
118 |
"Proverbs":"Proverbs",
|
|
119 |
"Psa.":"Psalms",
|
|
120 |
"Psal.":"Psalms",
|
|
121 |
"Psalm":"Psalms",
|
|
122 |
"Rev.":"Revelation",
|
|
123 |
"Rom":"Romans",
|
|
124 |
"Rom.":"Romans",
|
|
125 |
"Ruth":"Ruth",
|
|
126 |
"Sam.":"Samuel",
|
|
127 |
"Thess.":"Thessalonians",
|
|
128 |
"Tim.":"Timothy",
|
|
129 |
"Timothy":"Timothy",
|
|
130 |
"Tit.":"Titus",
|
|
131 |
"Titus":"Titus",
|
|
132 |
"Vers.":"/Vers./",
|
|
133 |
"Zach":"Zechariah",
|
|
134 |
"Zach.":"Zechariah",
|
|
135 |
]
|
|
136 |
|
|
137 |
bibAbbr = [
|
|
138 |
"Ac.":"Acts",
|
|
139 |
"Act":"Acts",
|
|
140 |
"Act.":"Acts",
|
|
141 |
"Acts":"Acts",
|
|
142 |
"Apoc.":"Rev.",
|
|
143 |
"Apocalypse":"Rev.",
|
|
144 |
"Baruch":"Bar.",
|
|
145 |
"chap.":"/chap./",
|
|
146 |
"Chro.":"Chr.",
|
|
147 |
"Chron.":"Chr.",
|
|
148 |
"Col.":"Col.",
|
|
149 |
"Coll.":"Col.",
|
|
150 |
"Colos.":"Col.",
|
|
151 |
"Cor.":"Cor.",
|
|
152 |
"Dan.":"Dan.",
|
|
153 |
"Deut.":"Deut.",
|
|
154 |
"Deuteronomy":"Deut.",
|
|
155 |
"Eccles.":"Eccles.",
|
|
156 |
"Ecclesiastes":"Eccles.",
|
|
157 |
"Ecclus.":"Ecclus.",
|
|
158 |
"Eph.":"Eph.",
|
|
159 |
"Ephes.":"Eph.",
|
|
160 |
"Epist.":"/Epist./",
|
|
161 |
"Esay":"Isa.",
|
|
162 |
"Esdras":"Esdras",
|
|
163 |
"Exo.":"Exod.",
|
|
164 |
"Exod":"Exod.",
|
|
165 |
"Exod.":"Exod.",
|
|
166 |
"Ezek.":"Ezek.",
|
|
167 |
"Gal.":"Gal.",
|
|
168 |
"Gen":"Gen.",
|
|
169 |
"Gen.":"Gen.",
|
|
170 |
"Haggai":"Hag.",
|
|
171 |
"Heb.":"Heb.",
|
|
172 |
"Iames":"Jas.",
|
|
173 |
"Ier.":"Jer.",
|
|
174 |
"Ierem.":"Jer.",
|
|
175 |
"Ioel":"Joel",
|
|
176 |
"Ioh.":"John",
|
|
177 |
"Iohn":"John",
|
|
178 |
"Iosh.":"Josh.",
|
|
179 |
"Ioshua":"Josh.",
|
|
180 |
"Isa":"Isa.",
|
|
181 |
"Isa.":"Isa.",
|
|
182 |
"Isai":"Isa.",
|
|
183 |
"Isaiah":"Isa.",
|
|
184 |
"Isay":"Isa.",
|
|
185 |
"Iud.":"Jude",
|
|
186 |
"Iude":"Jude",
|
|
187 |
"Iudg.":"Judg.",
|
|
188 |
"Jer.":"Jer.",
|
|
189 |
"Jerem.":"Jer.",
|
|
190 |
"Jo.":"Kgs",
|
|
191 |
"Jo.":"Job",
|
|
192 |
"Job":"Job",
|
|
193 |
"Joel":"Joel",
|
|
194 |
"Joh.":"John",
|
|
195 |
"John":"John",
|
|
196 |
"Josh.":"Josh.",
|
|
197 |
"Joshua":"Josh.",
|
|
198 |
"Jud.":"Jude",
|
|
199 |
"Judg.":"Judg.",
|
|
200 |
"Judges":"Judg.",
|
|
201 |
"Kings":"Kgs",
|
|
202 |
"Levit.":"Lev.",
|
|
203 |
"Luk.":"Luke",
|
|
204 |
"Luke":"Luke",
|
|
205 |
"Mar.":"Mark",
|
|
206 |
"Mark":"Mark",
|
|
207 |
"Marke":"Mark",
|
|
208 |
"Mat.":"Matt.",
|
|
209 |
"Math":"Matt.",
|
|
210 |
"Math.":"Matt.",
|
|
211 |
"Matt.":"Matt.",
|
|
212 |
"Matth.":"Matt.",
|
|
213 |
"Matthew":"Matt.",
|
|
214 |
"Micah":"Mic.",
|
|
215 |
"Num.":"Num.",
|
|
216 |
"Numb":"Num.",
|
|
217 |
"Numb.":"Num.",
|
|
218 |
"Numbers":"Num.",
|
|
219 |
"Pet.":"Pet.",
|
|
220 |
"Prov.":"Prov.",
|
|
221 |
"Proverbs":"Prov.",
|
|
222 |
"Psa.":"Ps.",
|
|
223 |
"Psal.":"Ps.",
|
|
224 |
"Psalm":"Ps.",
|
|
225 |
"Rev.":"Rev.",
|
|
226 |
"Rom":"Rom.",
|
|
227 |
"Rom.":"Rom.",
|
|
228 |
"Ruth":"Ruth",
|
|
229 |
"Sam.":"Sam.",
|
|
230 |
"Thess.":"Thess.",
|
|
231 |
"Tim.":"Tim.",
|
|
232 |
"Timothy":"Tim.",
|
|
233 |
"Tit.":"Titus",
|
|
234 |
"Titus":"Titus",
|
|
235 |
"Vers.":"/Vers./",
|
|
236 |
"Zach":"Zech.",
|
|
237 |
"Zach.":"Zech.",
|
|
238 |
]
|
|
239 |
|
|
240 |
/*
|
|
241 |
println "bibAbbr lengths :"
|
|
242 |
println "len\tF"
|
|
243 |
bibAbbr.collect { it.value }.sort().unique().collect { it.length() }.sort().countBy { it }.each { println sprintf("%1d\t%2d", it.key, it.value) }
|
|
244 |
(5..8).each { max ->
|
|
245 |
abbrs = bibAbbr.collect { it.value }.sort().unique().findAll { it.length() == max }
|
|
246 |
println max+" : "+abbrs
|
|
247 |
}
|
|
248 |
*/
|
|
249 |
|
|
250 |
// check for a corpus selection
|
|
251 |
utils = new CQPUtils()
|
|
252 |
corpusEngine = CQPSearchEngine.getCqiClient()
|
|
253 |
|
|
254 |
corpora = utils.getCorpora(this)
|
|
255 |
scriptName = this.class.getSimpleName()
|
|
256 |
|
|
257 |
if ((corpora == null) || corpora.size() > 1) {
|
|
258 |
println "** $scriptName: please select a corpus in the Corpus view or provide a corpus name. Aborting."
|
|
259 |
return false
|
|
260 |
}
|
|
261 |
|
|
262 |
corpus = corpora[0].getMainCorpus()
|
|
263 |
corpusName = corpus.getName()
|
|
264 |
wordProperty = corpus.getProperty("word")
|
|
265 |
|
|
266 |
if (!URSCorpora.isAnnotationStructureReady(corpus)) {
|
|
267 |
println "** URS Annotation Structure of "+corpusName+" is not ready. Aborting."
|
|
268 |
return
|
|
269 |
}
|
|
270 |
|
|
271 |
// check for corpus annotation structure unit types
|
|
272 |
def analecCorpus = URSCorpora.getCorpus(corpus)
|
|
273 |
|
|
274 |
// Composite Biblical Reference : number, book, chapters_verses_list, chapters_verses_form
|
|
275 |
def crType = "Composite Biblical Reference"
|
|
276 |
if (!analecCorpus.getStructure().getUnites().contains(crType)) {
|
|
277 |
analecCorpus.getStructure().ajouterType(Unite.class, crType)
|
|
278 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "reference_form")
|
|
279 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "reference_id")
|
|
280 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "number")
|
|
281 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "book")
|
|
282 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "chapters_verses_form")
|
|
283 |
analecCorpus.getStructure().ajouterProp(Unite.class, crType, "books_chapters_verses_list")
|
|
284 |
}
|
|
285 |
|
|
286 |
// Biblical Reference : number, book, chapter, verse
|
|
287 |
def rType = "Biblical Reference"
|
|
288 |
if (!analecCorpus.getStructure().getUnites().contains(rType)) {
|
|
289 |
analecCorpus.getStructure().ajouterType(Unite.class, rType)
|
|
290 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "number")
|
|
291 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "book")
|
|
292 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "chapter")
|
|
293 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "verse")
|
|
294 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "raw_verses")
|
|
295 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "cr_id")
|
|
296 |
analecCorpus.getStructure().ajouterProp(Unite.class, rType, "type")
|
|
297 |
}
|
|
298 |
|
|
299 |
// check for corpus annotation structure input form
|
|
300 |
def vue = URSCorpora.getVue(corpus)
|
|
301 |
if (!vue.getTypesAVoir(Unite.class).contains(crType)) {
|
|
302 |
vue.ajouterType(Unite.class, crType)
|
|
303 |
vue.ajouterProp(Unite.class, crType, "reference_id")
|
|
304 |
vue.ajouterProp(Unite.class, crType, "reference_form")
|
|
305 |
vue.ajouterProp(Unite.class, crType, "books_chapters_verses_list")
|
|
306 |
vue.ajouterProp(Unite.class, crType, "chapters_verses_form")
|
|
307 |
vue.ajouterProp(Unite.class, crType, "book")
|
|
308 |
vue.ajouterProp(Unite.class, crType, "number")
|
|
309 |
}
|
|
310 |
|
|
311 |
if (!vue.getTypesAVoir(Unite.class).contains(rType)) {
|
|
312 |
vue.ajouterType(Unite.class, rType)
|
|
313 |
vue.ajouterProp(Unite.class, rType, "type")
|
|
314 |
vue.ajouterProp(Unite.class, rType, "cr_id")
|
|
315 |
vue.ajouterProp(Unite.class, rType, "raw_verses")
|
|
316 |
vue.ajouterProp(Unite.class, rType, "verse")
|
|
317 |
vue.ajouterProp(Unite.class, rType, "chapter")
|
|
318 |
vue.ajouterProp(Unite.class, rType, "book")
|
|
319 |
vue.ajouterProp(Unite.class, rType, "number")
|
|
320 |
}
|
|
321 |
|
|
322 |
// manage cqp matching strategy
|
|
323 |
cqp_matching_strategy = System.getProperty("cqp_matching_strategy")
|
|
324 |
|
|
325 |
if (!(cqp_matching_strategy == 'longest')) {
|
|
326 |
println "Changing MatchingStrategy from '"+cqp_matching_strategy+"' to 'longest'"
|
|
327 |
corpusEngine.query("set MatchingStrategy longest;")
|
|
328 |
System.setProperty("cqp_matching_strategy", 'longest')
|
|
329 |
}
|
|
330 |
|
|
331 |
corpusEngine.cqpQuery(corpusName, bibRefs, query)
|
|
332 |
nmatches = corpusEngine.subCorpusSize("$corpusName:$bibRefs")
|
|
333 |
|
|
334 |
if (nmatches == 0) {
|
|
335 |
println "** No references found. Aborting."
|
|
336 |
return
|
|
337 |
}
|
|
338 |
|
|
339 |
println "Found "+nmatches+" biblical references."
|
|
340 |
|
|
341 |
println "Removing all <"+rType+"> units..."
|
|
342 |
analecCorpus.getUnites(rType).toArray(new Unite[0]).each { analecCorpus.supUnite(it) }
|
|
343 |
|
|
344 |
nRUnits = 0
|
|
345 |
|
|
346 |
def ScanChapsVers (book, num, chapsVers) {
|
|
347 |
|
|
348 |
def lookingForChap = true
|
|
349 |
def restIsVerse = false
|
|
350 |
def CVList = []
|
|
351 |
def maxVerse = 0
|
|
352 |
def currentChap = 0
|
|
353 |
def bookName
|
|
354 |
|
|
355 |
if (num.length() > 0) {
|
|
356 |
num += " "
|
|
357 |
}
|
|
358 |
|
|
359 |
if (bibAbbr.containsKey(book)) {
|
|
360 |
bookName = num+bibAbbr[book]
|
|
361 |
} else {
|
|
362 |
println "** unkown book name: "+book
|
|
363 |
bookName = num+"?"+book+"?"
|
|
364 |
}
|
|
365 |
|
|
366 |
chapsVers.each {
|
|
367 |
|
|
368 |
/*
|
|
369 |
15. vers. 22 , 23 , 24. 15.22,15.23,15.24 chap-VERS-verses
|
|
370 |
8. vers. 9. 8.9 chap-VERS-verse
|
|
371 |
4. v. 14 4.14 chap-V-verse
|
|
372 |
14. ver. 34. 14.34 chap-VER-verse
|
|
373 |
3. ver. 11 , 12 , 3.11,3.12 chap-VER-verses
|
|
374 |
9. 9. 9.9 chap-verse
|
|
375 |
2. 13. 14 . 2.13,2.14 chap-verses
|
|
376 |
4. 41. 5. 26. 4.41,5.26 chap-verse-chap-verse
|
|
377 |
9. 13. 9. 21. 10. 12. 12. 19. 9.13,9.21,10.12,12.19 chaps-verses
|
|
378 |
9. 13. 9. 21. 10. 12. 12. 19. 2 9.13,9.21,10.12,12.19 chaps-verses-num
|
|
379 |
3. 11 8. 3.11,8 chap-verse-chap
|
|
380 |
5. 3 , 4 , & 5. 5.3,5.4,5.5 chap-verses-&-verse
|
|
381 |
5. 5. 7. 13 , 15. 27. 6. & 30. 25. 5.5,5.7,5.13,5.15,5.27,6,30.25 chap-verses-&-chap-verse
|
|
382 |
15. 15 chap
|
|
383 |
*/
|
|
384 |
|
|
385 |
if (it != "," && it != "&" && it != "." && it != "[") {
|
|
386 |
|
|
387 |
// remove trailing '.'
|
|
388 |
it = (it ==~ /.*\./) ? it.substring(0, it.length()-1) : it
|
|
389 |
|
|
390 |
if (restIsVerse) {
|
|
391 |
if (it ==~ /[0-9].*/) {
|
|
392 |
maxVerse = (it as Integer)
|
|
393 |
CVList << [bookName, currentChap, (it as Integer)]
|
|
394 |
}
|
|
395 |
} else if (lookingForChap) {
|
|
396 |
currentChap = (it as Integer)
|
|
397 |
lookingForChap = false
|
|
398 |
} else if (it ==~ /v.*/) {
|
|
399 |
restIsVerse = true
|
|
400 |
} else if ((it as Integer) > maxVerse) {
|
|
401 |
maxVerse = (it as Integer)
|
|
402 |
CVList << [bookName, currentChap, (it as Integer)]
|
|
403 |
} else {
|
|
404 |
currentChap = (it as Integer)
|
|
405 |
maxVerse = 0
|
|
406 |
}
|
|
407 |
}
|
|
408 |
}
|
|
409 |
|
|
410 |
if (maxVerse == 0) {
|
|
411 |
CVList << [bookName, currentChap, 0]
|
|
412 |
}
|
|
413 |
|
|
414 |
// return CVList.toString()
|
|
415 |
return CVList
|
|
416 |
|
|
417 |
}
|
|
418 |
|
|
419 |
def starts = corpusEngine.dumpSubCorpus("$corpusName:$bibRefs", ICqiClient.CQI_CONST_FIELD_MATCH, 0, nmatches-1)
|
|
420 |
def targets = corpusEngine.dumpSubCorpus("$corpusName:$bibRefs", ICqiClient.CQI_CONST_FIELD_TARGET, 0, nmatches-1)
|
|
421 |
def ends = corpusEngine.dumpSubCorpus("$corpusName:$bibRefs", ICqiClient.CQI_CONST_FIELD_MATCHEND, 0, nmatches-1)
|
|
422 |
|
|
423 |
[starts, targets, ends].transpose().each {
|
|
424 |
//[starts, targets, ends].transpose().collect {
|
|
425 |
|
|
426 |
iUnit = 0
|
|
427 |
|
|
428 |
if (it[0] < it[1]) { // number+book+chaps-verses
|
|
429 |
def refForm = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]..it[2]) as int[]).join(' ')
|
|
430 |
def number = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]..(it[1]-1)) as int[]).join(' ')
|
|
431 |
number = (number ==~ /.*\./) ? number.substring(0, number.length()-1) : number
|
|
432 |
def book = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[1]..it[1]) as int[]).join(' ')
|
|
433 |
def chapsVersForms = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[1]+1)..it[2] as int[]).join(' ')
|
|
434 |
def chapsVersFormsList = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[1]+1)..it[2] as int[]) as String[]
|
|
435 |
def chapsVersList = ScanChapsVers(book, number, chapsVersFormsList)
|
|
436 |
|
|
437 |
chapsVersList.reverse().each { b, c, v ->
|
|
438 |
|
|
439 |
def properties = [:]
|
|
440 |
properties["number"] = number
|
|
441 |
properties["book"] = b
|
|
442 |
properties["chapter"] = c as String
|
|
443 |
properties["verse"] = v as String
|
|
444 |
properties["raw_verses"] = LangFormater.format(chapsVersForms, corpus.getLang())
|
|
445 |
properties["cr_id"] = 0 as String
|
|
446 |
properties["type"] = "word"
|
|
447 |
|
|
448 |
analecCorpus.addUniteSaisie(rType, it[0], it[2], properties)
|
|
449 |
nRUnits++
|
|
450 |
|
|
451 |
}
|
|
452 |
|
|
453 |
// return refForm+'\t"'+refForm.replaceAll(/( )/,'"$1"')+'"\t'+number+'\t'+book+'\t'+chapsVersForms+'\t'+chapsVersList
|
|
454 |
// return chapsVersList as String[]
|
|
455 |
|
|
456 |
} else { // book+chaps-verses
|
|
457 |
def refForm = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]..it[2]) as int[]).join(' ')
|
|
458 |
def book = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]..it[0]) as int[]).join(' ')
|
|
459 |
def chapsVersForms = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]+1)..it[2] as int[]).join(' ')
|
|
460 |
def chapsVersFormsList = corpusEngine.cpos2Str("$corpusName.$wordProperty", (it[0]+1)..it[2] as int[]) as String[]
|
|
461 |
def chapsVersList = ScanChapsVers(book, "", chapsVersFormsList)
|
|
462 |
|
|
463 |
chapsVersList.reverse().each { b, c, v ->
|
|
464 |
|
|
465 |
def properties = [:]
|
|
466 |
properties["number"] = ""
|
|
467 |
properties["book"] = b
|
|
468 |
properties["chapter"] = c as String
|
|
469 |
properties["raw_verses"] = LangFormater.format(chapsVersForms, corpus.getLang())
|
|
470 |
properties["verse"] = v as String
|
|
471 |
properties["cr_id"] = 0 as String
|
|
472 |
properties["type"] = "word"
|
|
473 |
|
|
474 |
analecCorpus.addUniteSaisie(rType, it[0], it[2], properties)
|
|
475 |
nRUnits++
|
|
476 |
|
|
477 |
}
|
|
478 |
|
|
479 |
// return refForm+'\t"'+refForm.replaceAll(/( )/,'"$1"')+'"\t'+'\t'+book+'\t'+chapsVersForms+'\t'+chapsVersList
|
|
480 |
// return chapsVersList as String[]
|
|
481 |
}
|
|
482 |
}
|
|
483 |
// }.countBy { it }.sort { a,b -> -a.value <=> -b.value ?: a.key <=> b.key }.each {
|
|
484 |
// }.flatten().countBy { it }.sort { a,b -> -a.value <=> -b.value ?: a.key <=> b.key }.each {
|
|
485 |
// println it.key+"\t"+it.value
|
|
486 |
// }
|
|
487 |
|
|
488 |
if (!(cqp_matching_strategy == 'longest')) {
|
|
489 |
println "Changing MatchingStrategy back to '"+cqp_matching_strategy+"'"
|
|
490 |
corpusEngine.query("set MatchingStrategy "+cqp_matching_strategy+";")
|
|
491 |
System.setProperty("cqp_matching_strategy", cqp_matching_strategy)
|
|
492 |
}
|
|
493 |
|
|
494 |
println "$nRUnits <$rType> units created."
|
|
495 |
|
|
496 |
print "Saving units... "
|
|
497 |
URSCorpora.saveCorpus(analecCorpus)
|
|
498 |
println "Done."
|
|
499 |
|
|
500 |
if (nRUnits > 0) corpus.setIsModified(true)
|
|
501 |
|