Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / FixEditionPageIds.groovy @ 479

History | View | Annotate | Download (1.9 kB)

1
package org.txm.scripts
2

    
3
import org.txm.utils.xml.DomUtils;
4

    
5
String corpus = "qgraal"
6
File PARAMS = new File("/home/mdecorde/TXM/corpora/$corpus/import.xml")
7
File PARAMS2 = new File("/home/mdecorde/TXM/corpora/$corpus/params2.xml")
8
File HTMLs = new File("/home/mdecorde/TXM/corpora/$corpus/HTML/QGRAALFRO")
9

    
10
def doc = DomUtils.load(PARAMS)
11
def root = doc.getDocumentElement();
12

    
13
def corporaList = root.getElementsByTagName("corpora");
14
def corporaElem = corporaList.item(0)
15

    
16
def corpusList = corporaElem.getElementsByTagName("corpus");
17
def corpusElem = corpusList.item(0)
18

    
19
def textsList = corpusElem.getElementsByTagName("texts")
20
def textsElem = textsList.item(0);
21

    
22
def textList = textsElem.getElementsByTagName("text")
23
for(int i = 0 ; i < textList.getLength() ; i++)
24
{
25
        def textElem = textList.item(i)
26
        String textname = textElem.getAttribute("name");
27
        println "process text: "+textname
28
        def editionsList = textElem.getElementsByTagName("editions")
29
        def editionsElem = editionsList.item(0)
30

    
31
        def editionList = editionsElem.getElementsByTagName("edition")
32

    
33
        for(int j = 0 ; j < editionList.getLength() ; j++)
34
        {
35
                def editionElem = editionList.item(j)
36
                String editionName = editionElem.getAttribute("name");
37

    
38
                def pages = new File(HTMLs, editionName).listFiles();
39
                pages.sort();
40

    
41
                def pageList = editionElem.getElementsByTagName("page")
42
                println pageList.getLength()
43
                println pages.size()
44
                for(int k = 0 ; k < pageList.getLength() ; k++)
45
                {
46
                        def pageElem = pageList.item(k);
47
                        String id = pages[k].getName();
48
                        int idx1 = id.lastIndexOf("_");
49
                        int idx2 = id.lastIndexOf(".html");
50
                        id = id.substring(idx1 + 1, idx2)
51
                        pageElem.setAttribute("id", id);
52
                }
53

    
54
                if(pageList.getLength() != pages.size())
55
                {
56
                        println "Error size pageList.getLength() != pages.size()"+pageList.getLength()+", "+pages.size()
57
                }
58
        }
59
}
60

    
61
DomUtils.save(doc, PARAMS2);
62

    
63
if (!(PARAMS.delete() && PARAMS2.renameTo(PARAMS))) println "Warning can't rename file "+PARAMS2+" to "+PARAMS
64