Révision 2789

tmp/org.txm.searchengine.cqp.core/src/org/txm/importer/cwb/ReadRegistryFile.java (revision 2789)
3 3
import java.io.File;
4 4
import java.util.ArrayList;
5 5
import java.util.HashMap;
6
import java.util.HashSet;
6 7

  
7 8
import org.txm.utils.io.IOUtils;
8 9

  
10
/**
11
 * Read a registry file a retrive the declared p attributes and s attributes informations.
12
 * 
13
 * Call constructor then use : getPattributes and getSattributes for cwb-encode
14
 * 
15
 * or use getSattributesMap, getSattributeProfs and getAnatypes() to get the declared attributes
16
 * @author mdecorde
17
 *
18
 */
9 19
public class ReadRegistryFile {
10 20
	File registryFile;
11 21
	ArrayList<String> pAttributes;
12 22
	ArrayList<String> sAttributes;
13
	HashMap<String, ArrayList<String>> sattrs;
23
	HashMap<String, HashSet<String>> sattrs;
24
	HashMap<String, Integer> sattrsProfs;
14 25
	
15 26
	public ReadRegistryFile(File registryFile) {
16 27
		this.registryFile = registryFile;
17 28
		read();
18 29
	}
19 30
	
31
	/*
32
	 * reload the informations
33
	 */
20 34
	public void read() {
21 35
		pAttributes = new ArrayList();
22 36
		sAttributes = new ArrayList();
23
		sattrs = new HashMap<String, ArrayList<String>>();
37
		sattrs = new HashMap<String, HashSet<String>>();
38
		sattrsProfs = new HashMap<String, Integer>();
39
		
24 40
		for (String line : IOUtils.getLines(registryFile, System.getProperty("file.encoding"))) {
25 41
			line = line.trim(); // remove first tab
26 42

  
......
32 48
				line = line.replaceAll("\\#.*", "");
33 49
				line = line.trim();
34 50
				String[] split = line.split("_", 2);
51
				String sname = split[0];
35 52
				//println split
36 53
				if (split.length == 1) { // sattr decl
37
					sattrs.put(split[0], new ArrayList<String>());
54
					if (sname.matches(".+[1-9]") && sattrs.containsKey(sname.substring(0, sname.length()-1))) { // recursive structure
55
						sname = sname.substring(0, sname.length()-1);
56
						sattrsProfs.put(sname, sattrsProfs.get(sname)+1);
57
					} else {
58
						sattrs.put(sname, new HashSet<String>());
59
						sattrsProfs.put(sname, 0);
60
					}
38 61
				} else {
39
					sattrs.get(split[0]).add(split[1]);
62
					String satt = split[1];
63
					if (satt.matches(".+[1-9]") && sattrs.get(sname).contains(satt.substring(0, satt.length()-1))) {
64
						// recursive attribute -> to be ignored
65
					} else {
66
						sattrs.get(sname).add(satt);
67
					}
40 68
				}
41 69
			}
42 70
		}
43 71
		
44 72
		for (String sattr : sattrs.keySet()) {
45
			String tmp = ""+sattr+":0";
46
			for (String attr : sattrs.get(sattr))
73
			String tmp = ""+sattr+":"+sattrsProfs.get(sattr);
74
			for (String attr : sattrs.get(sattr)) {
47 75
				tmp += "+"+attr;
76
			}
48 77
			sAttributes.add(tmp);
49 78
		}
50 79
	}
51 80
	
81
	/**
82
	 * 
83
	 * @return the cwb-encode arguments for p attributes
84
	 */
52 85
	public ArrayList<String> getPAttributes() {
53 86
		return pAttributes;
54 87
	}
55 88
	
89
	/**
90
	 * 
91
	 * @return the cwb-encode arguments for s attributes
92
	 */
56 93
	public ArrayList<String> getSAttributes() {
57 94
		return sAttributes;
58 95
	}
59 96
	
60
	public HashMap<String, ArrayList<String>> getSAttributesMap() {
97
	/**
98
	 * 
99
	 * @return the attributes of the structures
100
	 */
101
	public HashMap<String, HashSet<String>> getSAttributesMap() {
61 102
		return sattrs;
62 103
	}
63 104
	
105
	/**
106
	 * 
107
	 * @return the recursive level of the structures
108
	 */
109
	public HashMap<String, Integer> getSAttributesProfs() {
110
		return sattrsProfs;
111
	}
112
	
64 113
	public static void main(String[] args) {
65
		File registry = new File("/home/matt/xml/cqp/discours");
114
		File registry = new File(System.getProperty("user.home"), "runtime-rcpapplication.product/corpora/ANNOTATIONCONC/registry/annotationconc");
66 115
		ReadRegistryFile reader = new ReadRegistryFile(registry);
67 116
		System.out.println("pAttributes: "+reader.getPAttributes());
68 117
		System.out.println("sAttributes Map: "+reader.getSAttributesMap());

Formats disponibles : Unified diff