root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / filters / OneTagPerLine / OneTagPerLine.groovy @ 1000
History | View | Annotate | Download (2.3 kB)
1 | 321 | mdecorde | // Copyright © 2010-2013 ENS de Lyon.
|
---|---|---|---|
2 | 321 | mdecorde | // Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 | 321 | mdecorde | // Lyon 2, University of Franche-Comté, University of Nice
|
4 | 321 | mdecorde | // Sophia Antipolis, University of Paris 3.
|
5 | 321 | mdecorde | //
|
6 | 321 | mdecorde | // The TXM platform is free software: you can redistribute it
|
7 | 321 | mdecorde | // and/or modify it under the terms of the GNU General Public
|
8 | 321 | mdecorde | // License as published by the Free Software Foundation,
|
9 | 321 | mdecorde | // either version 2 of the License, or (at your option) any
|
10 | 321 | mdecorde | // later version.
|
11 | 321 | mdecorde | //
|
12 | 321 | mdecorde | // The TXM platform is distributed in the hope that it will be
|
13 | 321 | mdecorde | // useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 | 321 | mdecorde | // warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 | 321 | mdecorde | // PURPOSE. See the GNU General Public License for more
|
16 | 321 | mdecorde | // details.
|
17 | 321 | mdecorde | //
|
18 | 321 | mdecorde | // You should have received a copy of the GNU General
|
19 | 321 | mdecorde | // Public License along with the TXM platform. If not, see
|
20 | 321 | mdecorde | // http://www.gnu.org/licenses.
|
21 | 321 | mdecorde | //
|
22 | 321 | mdecorde | //
|
23 | 321 | mdecorde | //
|
24 | 321 | mdecorde | // $LastChangedDate:$
|
25 | 321 | mdecorde | // $LastChangedRevision:$
|
26 | 321 | mdecorde | // $LastChangedBy:$
|
27 | 321 | mdecorde | //
|
28 | 1000 | mdecorde | package org.txm.scripts.filters.OneTagPerLine;
|
29 | 321 | mdecorde | |
30 | 321 | mdecorde | //Pre-processing extra-word tags (1)
|
31 | 1000 | mdecorde | import org.txm.importer.scripts.filters.*; |
32 | 321 | mdecorde | import java.util.regex.*; |
33 | 321 | mdecorde | import org.txm.tokenizer.TokenizerClasses; |
34 | 321 | mdecorde | |
35 | 321 | mdecorde | // TODO: Auto-generated Javadoc
|
36 | 321 | mdecorde | /**
|
37 | 321 | mdecorde | * The Class OneTagPerLine.
|
38 | 321 | mdecorde | */
|
39 | 321 | mdecorde | class OneTagPerLine extends Filter { |
40 | 321 | mdecorde | |
41 | 321 | mdecorde | /** The tag_all. */
|
42 | 321 | mdecorde | String tag_all = TokenizerClasses.tag_all;
|
43 | 321 | mdecorde | |
44 | 321 | mdecorde | /** The counterreg1. */
|
45 | 321 | mdecorde | int counterreg1 = 0; |
46 | 321 | mdecorde | |
47 | 321 | mdecorde | /** The counterreg2. */
|
48 | 321 | mdecorde | int counterreg2 = 0; |
49 | 321 | mdecorde | |
50 | 321 | mdecorde | /* (non-Javadoc)
|
51 | 321 | mdecorde | * @see org.txm.importer.filters.Filter#SetUsedParam(java.lang.Object)
|
52 | 321 | mdecorde | */
|
53 | 321 | mdecorde | void SetUsedParam(Object args) { |
54 | 321 | mdecorde | |
55 | 321 | mdecorde | } |
56 | 321 | mdecorde | |
57 | 321 | mdecorde | /* (non-Javadoc)
|
58 | 321 | mdecorde | * @see org.txm.importer.filters.Filter#before()
|
59 | 321 | mdecorde | */
|
60 | 321 | mdecorde | boolean before() {
|
61 | 321 | mdecorde | |
62 | 321 | mdecorde | } |
63 | 321 | mdecorde | |
64 | 321 | mdecorde | /* (non-Javadoc)
|
65 | 321 | mdecorde | * @see org.txm.importer.filters.Filter#after()
|
66 | 321 | mdecorde | */
|
67 | 321 | mdecorde | void after() {
|
68 | 321 | mdecorde | |
69 | 321 | mdecorde | } |
70 | 321 | mdecorde | |
71 | 321 | mdecorde | /* (non-Javadoc)
|
72 | 321 | mdecorde | * @see org.txm.importer.filters.Filter#filter()
|
73 | 321 | mdecorde | */
|
74 | 321 | mdecorde | void filter()
|
75 | 321 | mdecorde | { |
76 | 321 | mdecorde | def m;
|
77 | 321 | mdecorde | boolean firstTest = true; |
78 | 321 | mdecorde | |
79 | 321 | mdecorde | while(!(line ==~ /\A\s*\Z/)) |
80 | 321 | mdecorde | { |
81 | 321 | mdecorde | if (( m = line =~ /^([^<]+)(.*)$/) )// trucs(<balise> |
82 | 321 | mdecorde | { |
83 | 321 | mdecorde | if(firstTest)
|
84 | 321 | mdecorde | { |
85 | 321 | mdecorde | output.write(lineSeparator); |
86 | 321 | mdecorde | firstTest=false;
|
87 | 321 | mdecorde | } |
88 | 321 | mdecorde | output.write(m[0][1]); |
89 | 321 | mdecorde | line = m[0][2]; |
90 | 321 | mdecorde | } |
91 | 321 | mdecorde | else if ((m = line =~ /^($tag_all)(.*)$/)) {// |
92 | 321 | mdecorde | firstTest=false;
|
93 | 321 | mdecorde | output.write("\n"+m[0][1]); |
94 | 321 | mdecorde | line = m[0][2]; |
95 | 321 | mdecorde | } |
96 | 321 | mdecorde | else
|
97 | 321 | mdecorde | { |
98 | 321 | mdecorde | println "Error in "+ line;
|
99 | 321 | mdecorde | line = " ";
|
100 | 321 | mdecorde | } |
101 | 321 | mdecorde | } |
102 | 321 | mdecorde | // End
|
103 | 321 | mdecorde | } |
104 | 321 | mdecorde | } |