Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / filters / ProcessEnclitics / ProcessEnclitics.groovy @ 1000

History | View | Annotate | Download (2.5 kB)

1 321 mdecorde
// Copyright © 2010-2013 ENS de Lyon.
2 321 mdecorde
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3 321 mdecorde
// Lyon 2, University of Franche-Comté, University of Nice
4 321 mdecorde
// Sophia Antipolis, University of Paris 3.
5 321 mdecorde
//
6 321 mdecorde
// The TXM platform is free software: you can redistribute it
7 321 mdecorde
// and/or modify it under the terms of the GNU General Public
8 321 mdecorde
// License as published by the Free Software Foundation,
9 321 mdecorde
// either version 2 of the License, or (at your option) any
10 321 mdecorde
// later version.
11 321 mdecorde
//
12 321 mdecorde
// The TXM platform is distributed in the hope that it will be
13 321 mdecorde
// useful, but WITHOUT ANY WARRANTY; without even the implied
14 321 mdecorde
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 321 mdecorde
// PURPOSE. See the GNU General Public License for more
16 321 mdecorde
// details.
17 321 mdecorde
//
18 321 mdecorde
// You should have received a copy of the GNU General
19 321 mdecorde
// Public License along with the TXM platform. If not, see
20 321 mdecorde
// http://www.gnu.org/licenses.
21 321 mdecorde
//
22 321 mdecorde
//
23 321 mdecorde
//
24 321 mdecorde
// $LastChangedDate:$
25 321 mdecorde
// $LastChangedRevision:$
26 321 mdecorde
// $LastChangedBy:$
27 321 mdecorde
//
28 1000 mdecorde
package org.txm.scripts.filters.ProcessEnclitics;
29 321 mdecorde
30 1000 mdecorde
import org.txm.importer.scripts.filters.*;
31 321 mdecorde
import java.util.regex.*;
32 321 mdecorde
import org.txm.tokenizer.TokenizerClasses;
33 321 mdecorde
34 321 mdecorde
// TODO: Auto-generated Javadoc
35 321 mdecorde
/**
36 321 mdecorde
 * The Class ProcessEnclitics.
37 321 mdecorde
 */
38 321 mdecorde
class ProcessEnclitics extends Filter {
39 321 mdecorde
40 321 mdecorde
        /** The counter. */
41 321 mdecorde
        int counter;
42 321 mdecorde
43 321 mdecorde
        /** The enclitics. */
44 321 mdecorde
        String enclitics = TokenizerClasses.enclitics;
45 321 mdecorde
46 321 mdecorde
        /* (non-Javadoc)
47 321 mdecorde
         * @see org.txm.importer.filters.Filter#SetUsedParam(java.lang.Object)
48 321 mdecorde
         */
49 321 mdecorde
        void SetUsedParam(Object args)
50 321 mdecorde
        {
51 321 mdecorde
                try
52 321 mdecorde
                {
53 321 mdecorde
                        enclitics = args.get("enclitics");
54 321 mdecorde
55 321 mdecorde
                }
56 321 mdecorde
                catch(Exception e)
57 321 mdecorde
                {
58 321 mdecorde
                        System.err.println(e);
59 321 mdecorde
                        System.err.println("Processenclitics needs 1 Map with arg  :\n enclitics")
60 321 mdecorde
                }
61 321 mdecorde
        }
62 321 mdecorde
63 321 mdecorde
        /* (non-Javadoc)
64 321 mdecorde
         * @see org.txm.importer.filters.Filter#before()
65 321 mdecorde
         */
66 321 mdecorde
        boolean before() {
67 321 mdecorde
                counter = 0;
68 321 mdecorde
                System.out.println("begin enclitics");
69 321 mdecorde
        }
70 321 mdecorde
71 321 mdecorde
        /* (non-Javadoc)
72 321 mdecorde
         * @see org.txm.importer.filters.Filter#after()
73 321 mdecorde
         */
74 321 mdecorde
        void after()
75 321 mdecorde
        {
76 321 mdecorde
                print "$counter enclitics with dashes found\n";
77 321 mdecorde
        }
78 321 mdecorde
79 321 mdecorde
        /* (non-Javadoc)
80 321 mdecorde
         * @see org.txm.importer.filters.Filter#filter()
81 321 mdecorde
         */
82 321 mdecorde
        void filter()
83 321 mdecorde
        {
84 321 mdecorde
                def m;
85 321 mdecorde
                def segment;
86 321 mdecorde
                // Write your code here, but don't forget to write in the output
87 321 mdecorde
                // ex : output.write("TheStringToWrite " + line );
88 321 mdecorde
                // in the var line is the current line
89 321 mdecorde
                if( (m = line =~ /\A\s*(<w[^>]*>)(.*)-($enclitics)<\/w>\Z/))
90 321 mdecorde
                {
91 321 mdecorde
                        counter++;
92 321 mdecorde
                        def word1_tag = (m[0][1]);
93 321 mdecorde
                        def word1 = (m[0][2]);
94 321 mdecorde
                        def word2 = (m[0][3]);
95 321 mdecorde
                        output.write("$word1_tag$word1-</w>\n<w>$word2</w>"+lineSeparator);
96 321 mdecorde
                }
97 321 mdecorde
                else
98 321 mdecorde
                {
99 321 mdecorde
                        output.write(line+lineSeparator);
100 321 mdecorde
                }
101 321 mdecorde
                // End
102 321 mdecorde
        }
103 321 mdecorde
}