Statistics
| Revision:

root / tmp / org.txm.core / src / groovy / org / txm / scripts / FixEditionPageIds.groovy @ 187

History | View | Annotate | Download (1.9 kB)

1
package org.txm.scripts
2

    
3

    
4
String corpus = "qgraal"
5
File PARAMS = new File("/home/mdecorde/TXM/corpora/$corpus/import.xml")
6
File PARAMS2 = new File("/home/mdecorde/TXM/corpora/$corpus/params2.xml")
7
File HTMLs = new File("/home/mdecorde/TXM/corpora/$corpus/HTML/QGRAALFRO")
8

    
9
def doc = DomUtils.load(PARAMS)
10
def root = doc.getDocumentElement();
11

    
12
def corporaList = root.getElementsByTagName("corpora");
13
def corporaElem = corporaList.item(0)
14

    
15
def corpusList = corporaElem.getElementsByTagName("corpus");
16
def corpusElem = corpusList.item(0)
17

    
18
def textsList = corpusElem.getElementsByTagName("texts")
19
def textsElem = textsList.item(0);
20

    
21
def textList = textsElem.getElementsByTagName("text")
22
for(int i = 0 ; i < textList.getLength() ; i++)
23
{
24
        def textElem = textList.item(i)
25
        String textname = textElem.getAttribute("name");
26
        println "process text: "+textname
27
        def editionsList = textElem.getElementsByTagName("editions")
28
        def editionsElem = editionsList.item(0)
29

    
30
        def editionList = editionsElem.getElementsByTagName("edition")
31

    
32
        for(int j = 0 ; j < editionList.getLength() ; j++)
33
        {
34
                def editionElem = editionList.item(j)
35
                String editionName = editionElem.getAttribute("name");
36

    
37
                def pages = new File(HTMLs, editionName).listFiles();
38
                pages.sort();
39

    
40
                def pageList = editionElem.getElementsByTagName("page")
41
                println pageList.getLength()
42
                println pages.size()
43
                for(int k = 0 ; k < pageList.getLength() ; k++)
44
                {
45
                        def pageElem = pageList.item(k);
46
                        String id = pages[k].getName();
47
                        int idx1 = id.lastIndexOf("_");
48
                        int idx2 = id.lastIndexOf(".html");
49
                        id = id.substring(idx1 + 1, idx2)
50
                        pageElem.setAttribute("id", id);
51
                }
52

    
53
                if(pageList.getLength() != pages.size())
54
                {
55
                        println "Error size pageList.getLength() != pages.size()"+pageList.getLength()+", "+pages.size()
56
                }
57
        }
58
}
59

    
60
DomUtils.save(doc, PARAMS2);
61

    
62
if (!(PARAMS.delete() && PARAMS2.renameTo(PARAMS))) println "Warning can't rename file "+PARAMS2+" to "+PARAMS
63