Révision 2789
| tmp/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/ReadRegistryFile.java (revision 2789) | ||
|---|---|---|
| 3 | 3 |
import java.io.File; |
| 4 | 4 |
import java.util.ArrayList; |
| 5 | 5 |
import java.util.HashMap; |
| 6 |
import java.util.HashSet; |
|
| 6 | 7 |
|
| 7 | 8 |
import org.txm.utils.io.IOUtils; |
| 8 | 9 |
|
| 10 |
/** |
|
| 11 |
* Read a registry file a retrive the declared p attributes and s attributes informations. |
|
| 12 |
* |
|
| 13 |
* Call constructor then use : getPattributes and getSattributes for cwb-encode |
|
| 14 |
* |
|
| 15 |
* or use getSattributesMap, getSattributeProfs and getAnatypes() to get the declared attributes |
|
| 16 |
* @author mdecorde |
|
| 17 |
* |
|
| 18 |
*/ |
|
| 9 | 19 |
public class ReadRegistryFile {
|
| 10 | 20 |
File registryFile; |
| 11 | 21 |
ArrayList<String> pAttributes; |
| 12 | 22 |
ArrayList<String> sAttributes; |
| 13 |
HashMap<String, ArrayList<String>> sattrs; |
|
| 23 |
HashMap<String, HashSet<String>> sattrs; |
|
| 24 |
HashMap<String, Integer> sattrsProfs; |
|
| 14 | 25 |
|
| 15 | 26 |
public ReadRegistryFile(File registryFile) {
|
| 16 | 27 |
this.registryFile = registryFile; |
| 17 | 28 |
read(); |
| 18 | 29 |
} |
| 19 | 30 |
|
| 31 |
/* |
|
| 32 |
* reload the informations |
|
| 33 |
*/ |
|
| 20 | 34 |
public void read() {
|
| 21 | 35 |
pAttributes = new ArrayList(); |
| 22 | 36 |
sAttributes = new ArrayList(); |
| 23 |
sattrs = new HashMap<String, ArrayList<String>>(); |
|
| 37 |
sattrs = new HashMap<String, HashSet<String>>(); |
|
| 38 |
sattrsProfs = new HashMap<String, Integer>(); |
|
| 39 |
|
|
| 24 | 40 |
for (String line : IOUtils.getLines(registryFile, System.getProperty("file.encoding"))) {
|
| 25 | 41 |
line = line.trim(); // remove first tab |
| 26 | 42 |
|
| ... | ... | |
| 32 | 48 |
line = line.replaceAll("\\#.*", "");
|
| 33 | 49 |
line = line.trim(); |
| 34 | 50 |
String[] split = line.split("_", 2);
|
| 51 |
String sname = split[0]; |
|
| 35 | 52 |
//println split |
| 36 | 53 |
if (split.length == 1) { // sattr decl
|
| 37 |
sattrs.put(split[0], new ArrayList<String>()); |
|
| 54 |
if (sname.matches(".+[1-9]") && sattrs.containsKey(sname.substring(0, sname.length()-1))) { // recursive structure
|
|
| 55 |
sname = sname.substring(0, sname.length()-1); |
|
| 56 |
sattrsProfs.put(sname, sattrsProfs.get(sname)+1); |
|
| 57 |
} else {
|
|
| 58 |
sattrs.put(sname, new HashSet<String>()); |
|
| 59 |
sattrsProfs.put(sname, 0); |
|
| 60 |
} |
|
| 38 | 61 |
} else {
|
| 39 |
sattrs.get(split[0]).add(split[1]); |
|
| 62 |
String satt = split[1]; |
|
| 63 |
if (satt.matches(".+[1-9]") && sattrs.get(sname).contains(satt.substring(0, satt.length()-1))) {
|
|
| 64 |
// recursive attribute -> to be ignored |
|
| 65 |
} else {
|
|
| 66 |
sattrs.get(sname).add(satt); |
|
| 67 |
} |
|
| 40 | 68 |
} |
| 41 | 69 |
} |
| 42 | 70 |
} |
| 43 | 71 |
|
| 44 | 72 |
for (String sattr : sattrs.keySet()) {
|
| 45 |
String tmp = ""+sattr+":0";
|
|
| 46 |
for (String attr : sattrs.get(sattr)) |
|
| 73 |
String tmp = ""+sattr+":"+sattrsProfs.get(sattr);
|
|
| 74 |
for (String attr : sattrs.get(sattr)) {
|
|
| 47 | 75 |
tmp += "+"+attr; |
| 76 |
} |
|
| 48 | 77 |
sAttributes.add(tmp); |
| 49 | 78 |
} |
| 50 | 79 |
} |
| 51 | 80 |
|
| 81 |
/** |
|
| 82 |
* |
|
| 83 |
* @return the cwb-encode arguments for p attributes |
|
| 84 |
*/ |
|
| 52 | 85 |
public ArrayList<String> getPAttributes() {
|
| 53 | 86 |
return pAttributes; |
| 54 | 87 |
} |
| 55 | 88 |
|
| 89 |
/** |
|
| 90 |
* |
|
| 91 |
* @return the cwb-encode arguments for s attributes |
|
| 92 |
*/ |
|
| 56 | 93 |
public ArrayList<String> getSAttributes() {
|
| 57 | 94 |
return sAttributes; |
| 58 | 95 |
} |
| 59 | 96 |
|
| 60 |
public HashMap<String, ArrayList<String>> getSAttributesMap() {
|
|
| 97 |
/** |
|
| 98 |
* |
|
| 99 |
* @return the attributes of the structures |
|
| 100 |
*/ |
|
| 101 |
public HashMap<String, HashSet<String>> getSAttributesMap() {
|
|
| 61 | 102 |
return sattrs; |
| 62 | 103 |
} |
| 63 | 104 |
|
| 105 |
/** |
|
| 106 |
* |
|
| 107 |
* @return the recursive level of the structures |
|
| 108 |
*/ |
|
| 109 |
public HashMap<String, Integer> getSAttributesProfs() {
|
|
| 110 |
return sattrsProfs; |
|
| 111 |
} |
|
| 112 |
|
|
| 64 | 113 |
public static void main(String[] args) {
|
| 65 |
File registry = new File("/home/matt/xml/cqp/discours");
|
|
| 114 |
File registry = new File(System.getProperty("user.home"), "runtime-rcpapplication.product/corpora/ANNOTATIONCONC/registry/annotationconc");
|
|
| 66 | 115 |
ReadRegistryFile reader = new ReadRegistryFile(registry); |
| 67 | 116 |
System.out.println("pAttributes: "+reader.getPAttributes());
|
| 68 | 117 |
System.out.println("sAttributes Map: "+reader.getSAttributesMap());
|
Formats disponibles : Unified diff