1 |
1 |
package org.txm.searchengine.cqp.corpus;
|
2 |
2 |
|
|
3 |
import java.util.ArrayList;
|
|
4 |
|
3 |
5 |
import org.txm.objects.Edition;
|
4 |
6 |
import org.txm.objects.Page;
|
|
7 |
import org.txm.objects.Project;
|
5 |
8 |
import org.txm.objects.Text;
|
|
9 |
import org.txm.searchengine.cqp.corpus.query.CQLQuery;
|
6 |
10 |
import org.txm.utils.TXMProgressMonitor;
|
|
11 |
import org.txm.utils.io.IOUtils;
|
7 |
12 |
|
8 |
13 |
/**
|
9 |
14 |
* Build onthefly HTML pages from the CQP indexes (words + structures)
|
... | ... | |
12 |
17 |
*
|
13 |
18 |
*/
|
14 |
19 |
public class CQPEdition extends Edition {
|
15 |
|
|
|
20 |
|
|
21 |
ArrayList<Page> cache = new ArrayList<>();
|
|
22 |
int MAX = 20;
|
|
23 |
|
16 |
24 |
public CQPEdition(Text text) {
|
17 |
25 |
|
18 |
26 |
super(text);
|
... | ... | |
25 |
33 |
|
26 |
34 |
@Override
|
27 |
35 |
protected boolean _compute(TXMProgressMonitor monitor) throws Exception {
|
28 |
|
// pPageNames.cl
|
29 |
|
// pPageFirstWordIds
|
30 |
|
return true;
|
|
36 |
pPageNames.clear();
|
|
37 |
pPageFirstWordIds.clear();
|
|
38 |
cache.clear();
|
|
39 |
|
|
40 |
Project p = getProject();
|
|
41 |
MainCorpus corpus = p.getChildren(MainCorpus.class).get(0);
|
|
42 |
String paginantionElementName = p.getEditionDefinition("cqp").getPageElement();
|
|
43 |
StructuralUnit structure = corpus.getStructuralUnit(paginantionElementName);
|
|
44 |
if (structure == null) {
|
|
45 |
paginantionElementName = "p";
|
|
46 |
structure = corpus.getStructuralUnit(paginantionElementName);
|
|
47 |
if (structure == null) {
|
|
48 |
paginantionElementName = "div";
|
|
49 |
structure = corpus.getStructuralUnit(paginantionElementName);
|
|
50 |
if (structure == null) {
|
|
51 |
paginantionElementName = "text";
|
|
52 |
}
|
|
53 |
}
|
|
54 |
}
|
|
55 |
QueryResult qr = corpus.query(new CQLQuery("<"+paginantionElementName+">[]"), "TMP", false);
|
|
56 |
|
|
57 |
String[] ids = corpus.getProperty("id").cpos2Str(qr.getStarts());
|
|
58 |
int nPages = qr.size;
|
|
59 |
|
|
60 |
for (int i = 0 ; i <= nPages ; i++) {
|
|
61 |
pPageNames.add(""+i);
|
|
62 |
pPageFirstWordIds.add(ids[i]);
|
|
63 |
}
|
|
64 |
qr.drop(); // no more usefull
|
|
65 |
return nPages > 0;
|
31 |
66 |
}
|
32 |
67 |
|
33 |
68 |
/**
|
... | ... | |
39 |
74 |
public Page getPage(int n) {
|
40 |
75 |
|
41 |
76 |
if (n >= 0 && pPageNames.size() > n) {
|
|
77 |
|
42 |
78 |
String wId = "w_0"; // default word id
|
43 |
79 |
if (pPageFirstWordIds.size() > n) {
|
44 |
80 |
wId = pPageFirstWordIds.get(n);
|
45 |
81 |
}
|
46 |
|
return new Page(this, pPageNames.get(n), wId, n);
|
|
82 |
Page p = new Page(this, pPageNames.get(n), wId, n);
|
|
83 |
if (cache.contains(p)) {
|
|
84 |
|
|
85 |
return p; // html file is alredy built
|
|
86 |
}
|
|
87 |
p.getFile().getParentFile().mkdirs();
|
|
88 |
IOUtils.write(p.getFile(), "<html><body>TODO n="+n+ "w="+wId+"</body></html>");
|
|
89 |
|
|
90 |
cache.add(p);
|
|
91 |
if (cache.size() > MAX) {
|
|
92 |
Page toClean = cache.remove(0);
|
|
93 |
toClean.getFile().delete();
|
|
94 |
}
|
|
95 |
return p;
|
47 |
96 |
}
|
48 |
97 |
else {
|
49 |
98 |
return null;
|