Statistics
| Revision:

root / tmp / org.txm.groovy.core / src / groovy / org / txm / importer / Pager.groovy @ 479

History | View | Annotate | Download (3.3 kB)

1
// Copyright © 2010-2013 ENS de Lyon.
2
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
3
// Lyon 2, University of Franche-Comté, University of Nice
4
// Sophia Antipolis, University of Paris 3.
5
// 
6
// The TXM platform is free software: you can redistribute it
7
// and/or modify it under the terms of the GNU General Public
8
// License as published by the Free Software Foundation,
9
// either version 2 of the License, or (at your option) any
10
// later version.
11
// 
12
// The TXM platform is distributed in the hope that it will be
13
// useful, but WITHOUT ANY WARRANTY; without even the implied
14
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15
// PURPOSE. See the GNU General Public License for more
16
// details.
17
// 
18
// You should have received a copy of the GNU General
19
// Public License along with the TXM platform. If not, see
20
// http://www.gnu.org/licenses.
21
// 
22
// 
23
// 
24
// $LastChangedDate: 2013-05-06 17:38:43 +0200 (lun. 06 mai 2013) $
25
// $LastChangedRevision: 2386 $
26
// $LastChangedBy: mdecorde $ 
27
//
28
package org.txm.importer
29
import java.util.regex.Pattern
30
// TODO: Auto-generated Javadoc
31

    
32
/**
33
 * The Class Pager.
34
 */
35
class Pager {
36
        
37
        //Prototype of generic pager
38
        
39
        //cut infos
40
        
41
        /** The cuttag. */
42
        String cuttag = "w";// tag to cut, default <w>
43
        
44
        /** The cutattr. */
45
        String cutattr;//attr of cuttag to test
46
        
47
        /** The cutvaluereg. */
48
        Pattern cutvaluereg;// regex value
49
        
50
        /** The cutnumber. */
51
        int cutnumber = 500;// number of cuttag before cutting, default 500 words
52
        
53
        //xpaths
54
        
55
        /** The xpaths. */
56
        HashMap<String, String> xpaths = new HashMap<String, String>();
57
        
58
        //tag classes
59
        
60
        /** The ptags. */
61
        public List<String> ptags = ["p", "lg", ];
62
        
63
        /** The linetags. */
64
        public List<String> linetags = ["br", "lb", "l"];
65
        
66
        /** The dbllinetags. */
67
        public List<String> dbllinetags = [];
68
        
69
        /** The commenttags. */
70
        public List<String> commenttags = ["note"];
71
        
72
        /** The corrtags. */
73
        public List<String> corrtags = ["corr"];
74
        
75
        /** The regtags. */
76
        public List<String> regtags = ["reg"];
77
        
78
        /** The foreigntags. */
79
        public List<String> foreigntags = ["foreign"];
80
        
81
        /**
82
         * Sets the cut infos.
83
         *
84
         * @param tag the tag
85
         * @param attr the attr
86
         * @param valuereg the valuereg
87
         * @param number the number
88
         */
89
        public void setCutInfos(String tag, String attr, String valuereg, int number)
90
        {
91
                if(tag != null && tag.length() > 0)
92
                        cuttag = tag;
93
                
94
                try
95
                {
96
                        Pattern p = Pattern.compile(valuereg);
97
                        cutvaluereg = p;
98
                }
99
                catch(Exception e){System.err.println("Pager: setCutInfos: Wrong regex syntax");}
100
                
101
                if(cutattr != null && cutattr.length() > 0)
102
                        cutattr = attr;
103
                
104
                if(cutnumber > 0)        
105
                        cutnumber = number;
106
        }
107
        
108
        /**
109
         * Sets the x paths.
110
         *
111
         * @param xpaths the xpaths
112
         */
113
        public void setXPaths(HashMap<String, String> xpaths)
114
        {
115
                if(xpaths != null)
116
                        this.xpaths = xpaths
117
        }
118
        
119
        /**
120
         * Adds the x path.
121
         *
122
         * @param name the name
123
         * @param xpath the xpath
124
         */
125
        public void addXPath(String name, String xpath)
126
        {
127
                if(xpath != null && name != null)
128
                        xpaths.put(name, xpath)
129
        }
130
        
131
        /**
132
         * Run.
133
         *
134
         * @param infile the infile
135
         * @return true, if successful
136
         */
137
        public boolean run(File infile)
138
        {
139
                
140
        }
141
        
142
        /**
143
         * The main method.
144
         *
145
         * @param args the arguments
146
         */
147
        public static void main(String[] args)
148
        {
149
                Pager p = new Pager()
150
                p.setCutInfos("w","type", "{[|]@^[|`[{`]@^pon.*", 600);
151
        }
152
}