Revision 2675 tmp/org.txm.core/src/java/org/txm/scripts/importer/MileStoneProjection.groovy
MileStoneProjection.groovy (revision 2675) | ||
---|---|---|
22 | 22 |
String mileStoneTag |
23 | 23 |
String startTag |
24 | 24 |
boolean start = false |
25 |
|
|
25 |
|
|
26 | 26 |
int mileStoneDistance = 0 |
27 |
|
|
27 |
|
|
28 | 28 |
String mileStoneID = "" |
29 | 29 |
def milestonesLength = [] |
30 | 30 |
int milestonesCounter = 0 |
... | ... | |
33 | 33 |
String msIdAttributeName |
34 | 34 |
String msStartAttributeName |
35 | 35 |
String msEndAttributeName |
36 |
|
|
36 |
|
|
37 | 37 |
public MileStoneProjection(File inputFile, String startTag, String wordTag, String mileStoneTag) { |
38 | 38 |
super(inputFile) |
39 |
|
|
39 |
|
|
40 | 40 |
this.wordTag = wordTag |
41 | 41 |
this.mileStoneTag = mileStoneTag |
42 | 42 |
mileStoneID = mileStoneTag+"_0" |
... | ... | |
46 | 46 |
msIdAttributeName = mileStoneTag+"id"; |
47 | 47 |
msStartAttributeName = mileStoneTag+"start"; |
48 | 48 |
msEndAttributeName = mileStoneTag+"end"; |
49 |
|
|
49 |
|
|
50 | 50 |
fetchMilestoneLengths(); |
51 | 51 |
start = false // reset |
52 | 52 |
} |
53 |
|
|
53 |
|
|
54 | 54 |
public void fetchMilestoneLengths() { |
55 | 55 |
def inputData = inputurl.openStream(); |
56 | 56 |
def factory = XMLInputFactory.newInstance(); |
57 | 57 |
def parser = factory.createXMLStreamReader(inputData); |
58 |
|
|
58 |
|
|
59 | 59 |
for (int event = parser.next(); event != XMLStreamConstants.END_DOCUMENT; event = parser.next()) { |
60 | 60 |
switch (event) { |
61 | 61 |
case XMLStreamConstants.START_ELEMENT: |
... | ... | |
78 | 78 |
milestonesCounter = 0; |
79 | 79 |
//println milestonesLength |
80 | 80 |
} |
81 |
|
|
82 |
|
|
83 | 81 |
|
82 |
|
|
83 |
|
|
84 | 84 |
public void processStartElement() { |
85 | 85 |
super.processStartElement(); |
86 |
|
|
86 |
|
|
87 | 87 |
if (start && localname.equals(mileStoneTag)) { |
88 | 88 |
mileStoneDistance = 0 |
89 | 89 |
mileStoneID = parser.getAttributeValue(null, "id") |
... | ... | |
96 | 96 |
writer.writeAttribute(msEndAttributeName, Integer.toString((milestonesLength[milestonesCounter] - mileStoneDistance - 1))) |
97 | 97 |
writer.writeAttribute(msStartAttributeName, Integer.toString(mileStoneDistance)) |
98 | 98 |
writer.writeAttribute(msIdAttributeName, mileStoneID) |
99 |
|
|
99 |
|
|
100 | 100 |
mileStoneDistance++ |
101 | 101 |
} else if (localname.equals(startTag)) { |
102 | 102 |
start = true |
103 | 103 |
} |
104 | 104 |
} |
105 |
|
|
105 |
|
|
106 | 106 |
public static void main(String[] args) { |
107 |
File inputFile = new File("/home/mdecorde/TXM-0.8.0-dev/corpora/XTZMILESTONES/tokenized/test.xml")
|
|
108 |
File outputFile = new File("/home/mdecorde/TXM-0.8.0-dev/corpora/XTZMILESTONES/tokenized/result.xml")
|
|
109 |
|
|
107 |
File inputFile = new File(System.getProperty("user.home"), "TXM-0.8.0-dev/corpora/XTZMILESTONES/tokenized/test.xml")
|
|
108 |
File outputFile = new File(System.getProperty("user.home"), "TXM-0.8.0-dev/corpora/XTZMILESTONES/tokenized/result.xml")
|
|
109 |
|
|
110 | 110 |
MileStoneProjection msp = new MileStoneProjection(inputFile, "text", "w", "lb"); |
111 | 111 |
println "Sucess: "+msp.process(outputFile) |
112 | 112 |
} |
Also available in: Unified diff