Révision 2789
tmp/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/ReadRegistryFile.java (revision 2789) | ||
---|---|---|
3 | 3 |
import java.io.File; |
4 | 4 |
import java.util.ArrayList; |
5 | 5 |
import java.util.HashMap; |
6 |
import java.util.HashSet; |
|
6 | 7 |
|
7 | 8 |
import org.txm.utils.io.IOUtils; |
8 | 9 |
|
10 |
/** |
|
11 |
* Read a registry file a retrive the declared p attributes and s attributes informations. |
|
12 |
* |
|
13 |
* Call constructor then use : getPattributes and getSattributes for cwb-encode |
|
14 |
* |
|
15 |
* or use getSattributesMap, getSattributeProfs and getAnatypes() to get the declared attributes |
|
16 |
* @author mdecorde |
|
17 |
* |
|
18 |
*/ |
|
9 | 19 |
public class ReadRegistryFile { |
10 | 20 |
File registryFile; |
11 | 21 |
ArrayList<String> pAttributes; |
12 | 22 |
ArrayList<String> sAttributes; |
13 |
HashMap<String, ArrayList<String>> sattrs; |
|
23 |
HashMap<String, HashSet<String>> sattrs; |
|
24 |
HashMap<String, Integer> sattrsProfs; |
|
14 | 25 |
|
15 | 26 |
public ReadRegistryFile(File registryFile) { |
16 | 27 |
this.registryFile = registryFile; |
17 | 28 |
read(); |
18 | 29 |
} |
19 | 30 |
|
31 |
/* |
|
32 |
* reload the informations |
|
33 |
*/ |
|
20 | 34 |
public void read() { |
21 | 35 |
pAttributes = new ArrayList(); |
22 | 36 |
sAttributes = new ArrayList(); |
23 |
sattrs = new HashMap<String, ArrayList<String>>(); |
|
37 |
sattrs = new HashMap<String, HashSet<String>>(); |
|
38 |
sattrsProfs = new HashMap<String, Integer>(); |
|
39 |
|
|
24 | 40 |
for (String line : IOUtils.getLines(registryFile, System.getProperty("file.encoding"))) { |
25 | 41 |
line = line.trim(); // remove first tab |
26 | 42 |
|
... | ... | |
32 | 48 |
line = line.replaceAll("\\#.*", ""); |
33 | 49 |
line = line.trim(); |
34 | 50 |
String[] split = line.split("_", 2); |
51 |
String sname = split[0]; |
|
35 | 52 |
//println split |
36 | 53 |
if (split.length == 1) { // sattr decl |
37 |
sattrs.put(split[0], new ArrayList<String>()); |
|
54 |
if (sname.matches(".+[1-9]") && sattrs.containsKey(sname.substring(0, sname.length()-1))) { // recursive structure |
|
55 |
sname = sname.substring(0, sname.length()-1); |
|
56 |
sattrsProfs.put(sname, sattrsProfs.get(sname)+1); |
|
57 |
} else { |
|
58 |
sattrs.put(sname, new HashSet<String>()); |
|
59 |
sattrsProfs.put(sname, 0); |
|
60 |
} |
|
38 | 61 |
} else { |
39 |
sattrs.get(split[0]).add(split[1]); |
|
62 |
String satt = split[1]; |
|
63 |
if (satt.matches(".+[1-9]") && sattrs.get(sname).contains(satt.substring(0, satt.length()-1))) { |
|
64 |
// recursive attribute -> to be ignored |
|
65 |
} else { |
|
66 |
sattrs.get(sname).add(satt); |
|
67 |
} |
|
40 | 68 |
} |
41 | 69 |
} |
42 | 70 |
} |
43 | 71 |
|
44 | 72 |
for (String sattr : sattrs.keySet()) { |
45 |
String tmp = ""+sattr+":0";
|
|
46 |
for (String attr : sattrs.get(sattr)) |
|
73 |
String tmp = ""+sattr+":"+sattrsProfs.get(sattr);
|
|
74 |
for (String attr : sattrs.get(sattr)) {
|
|
47 | 75 |
tmp += "+"+attr; |
76 |
} |
|
48 | 77 |
sAttributes.add(tmp); |
49 | 78 |
} |
50 | 79 |
} |
51 | 80 |
|
81 |
/** |
|
82 |
* |
|
83 |
* @return the cwb-encode arguments for p attributes |
|
84 |
*/ |
|
52 | 85 |
public ArrayList<String> getPAttributes() { |
53 | 86 |
return pAttributes; |
54 | 87 |
} |
55 | 88 |
|
89 |
/** |
|
90 |
* |
|
91 |
* @return the cwb-encode arguments for s attributes |
|
92 |
*/ |
|
56 | 93 |
public ArrayList<String> getSAttributes() { |
57 | 94 |
return sAttributes; |
58 | 95 |
} |
59 | 96 |
|
60 |
public HashMap<String, ArrayList<String>> getSAttributesMap() { |
|
97 |
/** |
|
98 |
* |
|
99 |
* @return the attributes of the structures |
|
100 |
*/ |
|
101 |
public HashMap<String, HashSet<String>> getSAttributesMap() { |
|
61 | 102 |
return sattrs; |
62 | 103 |
} |
63 | 104 |
|
105 |
/** |
|
106 |
* |
|
107 |
* @return the recursive level of the structures |
|
108 |
*/ |
|
109 |
public HashMap<String, Integer> getSAttributesProfs() { |
|
110 |
return sattrsProfs; |
|
111 |
} |
|
112 |
|
|
64 | 113 |
public static void main(String[] args) { |
65 |
File registry = new File("/home/matt/xml/cqp/discours");
|
|
114 |
File registry = new File(System.getProperty("user.home"), "runtime-rcpapplication.product/corpora/ANNOTATIONCONC/registry/annotationconc");
|
|
66 | 115 |
ReadRegistryFile reader = new ReadRegistryFile(registry); |
67 | 116 |
System.out.println("pAttributes: "+reader.getPAttributes()); |
68 | 117 |
System.out.println("sAttributes Map: "+reader.getSAttributesMap()); |
Formats disponibles : Unified diff