Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / scripts / filters / ProcessEnclitics / ProcessEnclitics.groovy @ 1000

History | View | Annotate | Download (2.5 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate:$
25
// $LastChangedRevision:$
26
// $LastChangedBy:$ 
27
//
28
package org.txm.scripts.filters.ProcessEnclitics;
29

    
30
import org.txm.importer.scripts.filters.*;
31
import java.util.regex.*;
32
import org.txm.tokenizer.TokenizerClasses;
33

    
34
// TODO: Auto-generated Javadoc
35
/**
36
 * The Class ProcessEnclitics.
37
 */
38
class ProcessEnclitics extends Filter {
39
        
40
        /** The counter. */
41
        int counter;
42
        
43
        /** The enclitics. */
44
        String enclitics = TokenizerClasses.enclitics;
45

    
46
        /* (non-Javadoc)
47
         * @see org.txm.importer.filters.Filter#SetUsedParam(java.lang.Object)
48
         */
49
        void SetUsedParam(Object args)
50
        {
51
                try
52
                {
53
                        enclitics = args.get("enclitics");
54

    
55
                }
56
                catch(Exception e)
57
                {
58
                        System.err.println(e);
59
                        System.err.println("Processenclitics needs 1 Map with arg  :\n enclitics")
60
                }
61
        }
62

    
63
        /* (non-Javadoc)
64
         * @see org.txm.importer.filters.Filter#before()
65
         */
66
        boolean before() {
67
                counter = 0;
68
                System.out.println("begin enclitics");
69
        }
70

    
71
        /* (non-Javadoc)
72
         * @see org.txm.importer.filters.Filter#after()
73
         */
74
        void after()
75
        {
76
                print "$counter enclitics with dashes found\n";
77
        }
78

    
79
        /* (non-Javadoc)
80
         * @see org.txm.importer.filters.Filter#filter()
81
         */
82
        void filter()
83
        {
84
                def m;
85
                def segment;
86
                // Write your code here, but don't forget to write in the output
87
                // ex : output.write("TheStringToWrite " + line );
88
                // in the var line is the current line
89
                if( (m = line =~ /\A\s*(<w[^>]*>)(.*)-($enclitics)<\/w>\Z/))
90
                {
91
                        counter++;
92
                        def word1_tag = (m[0][1]);
93
                        def word1 = (m[0][2]);
94
                        def word2 = (m[0][3]);
95
                        output.write("$word1_tag$word1-</w>\n<w>$word2</w>"+lineSeparator);
96
                }
97
                else 
98
                {
99
                        output.write(line+lineSeparator);
100
                }
101
                // End
102
        }
103
}