root / tmp / org.txm.core / src / java / org / txm / scripts / importer / ReunitShortLines.groovy @ 2473
History | View | Annotate | Download (2.3 kB)
1 |
// Copyright © 2010-2013 ENS de Lyon.
|
---|---|
2 |
// Copyright © 2007-2010 ENS de Lyon, CNRS, INRP, University of
|
3 |
// Lyon 2, University of Franche-Comté, University of Nice
|
4 |
// Sophia Antipolis, University of Paris 3.
|
5 |
//
|
6 |
// The TXM platform is free software: you can redistribute it
|
7 |
// and/or modify it under the terms of the GNU General Public
|
8 |
// License as published by the Free Software Foundation,
|
9 |
// either version 2 of the License, or (at your option) any
|
10 |
// later version.
|
11 |
//
|
12 |
// The TXM platform is distributed in the hope that it will be
|
13 |
// useful, but WITHOUT ANY WARRANTY; without even the implied
|
14 |
// warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
|
15 |
// PURPOSE. See the GNU General Public License for more
|
16 |
// details.
|
17 |
//
|
18 |
// You should have received a copy of the GNU General
|
19 |
// Public License along with the TXM platform. If not, see
|
20 |
// http://www.gnu.org/licenses.
|
21 |
//
|
22 |
//
|
23 |
//
|
24 |
// $LastChangedDate:$
|
25 |
// $LastChangedRevision:$
|
26 |
// $LastChangedBy:$
|
27 |
//
|
28 |
package org.txm.scripts.importer
|
29 |
|
30 |
import java.io.File; |
31 |
|
32 |
// TODO: Auto-generated Javadoc
|
33 |
/**
|
34 |
* The Class ReunitShortLines.
|
35 |
*/
|
36 |
class ReunitShortLines { |
37 |
|
38 |
/**
|
39 |
* Instantiates a new reunit short lines.
|
40 |
*
|
41 |
* @param file the file
|
42 |
* @param maxsize the maxsize
|
43 |
* @param encoding the encoding
|
44 |
*/
|
45 |
ReunitShortLines(File file, int maxsize, String encoding) |
46 |
{ |
47 |
File temp = new File( file.getParentFile(), "reu.txt"); |
48 |
Reader input = new InputStreamReader(new FileInputStream(file) , encoding); |
49 |
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(temp) , encoding); |
50 |
|
51 |
String line = input.readLine();
|
52 |
boolean oklength = true; |
53 |
boolean previousok = false; |
54 |
while(line != null) |
55 |
{ |
56 |
if(line.length() == 0) |
57 |
{ |
58 |
writer.write("\n");
|
59 |
oklength = true;
|
60 |
} |
61 |
else if(line.startsWith("**** ")) |
62 |
{ |
63 |
writer.write("\n"+line);
|
64 |
oklength = true;
|
65 |
} |
66 |
else
|
67 |
{ |
68 |
previousok = oklength; |
69 |
oklength = line.length() > maxsize; |
70 |
|
71 |
if(previousok && oklength)
|
72 |
writer.write("\n");
|
73 |
|
74 |
writer.write(line); |
75 |
|
76 |
if(previousok && ! oklength)
|
77 |
writer.write("\n");
|
78 |
|
79 |
} |
80 |
line = input.readLine(); |
81 |
} |
82 |
} |
83 |
|
84 |
/**
|
85 |
* The main method.
|
86 |
*
|
87 |
* @param args the arguments
|
88 |
*/
|
89 |
public static void main(String[] args) |
90 |
{ |
91 |
new ReunitShortLines(new File("/home/mdecorde/xml/voeux/Voeux.txt"), 30, "cp1252") |
92 |
} |
93 |
} |