|
1 |
<?xml version="1.0" encoding="UTF-8"?>
|
|
2 |
<text>
|
|
3 |
<div type="ponctuations">
|
|
4 |
<w id="w_test1_1" n="1">2</w>
|
|
5 |
<w id="w_test1_2" n="2">+</w>
|
|
6 |
<w id="w_test1_3" n="3">2</w>
|
|
7 |
<w id="w_test1_4" n="4">=</w>
|
|
8 |
<w id="w_test1_5" n="5">4</w>
|
|
9 |
<w id="w_test1_6" n="6">4</w>
|
|
10 |
<w id="w_test1_7" n="7">*</w>
|
|
11 |
<w id="w_test1_8" n="8">4</w>
|
|
12 |
<w id="w_test1_9" n="9">=</w>
|
|
13 |
<w id="w_test1_10" n="10">16</w>
|
|
14 |
<w id="w_test1_11" n="11">1</w>
|
|
15 |
<w id="w_test1_12" n="12">/</w>
|
|
16 |
<w id="w_test1_13" n="13">0</w>
|
|
17 |
<w id="w_test1_14" n="14">=</w>
|
|
18 |
<w id="w_test1_15" n="15">error</w>
|
|
19 |
<w id="w_test1_16" n="16">10</w>
|
|
20 |
<w id="w_test1_17" n="17">%</w>
|
|
21 |
<w id="w_test1_18" n="18">10</w>
|
|
22 |
<w id="w_test1_19" n="19">°</w>
|
|
23 |
<w id="w_test1_20" n="20">C</w>
|
|
24 |
<w id="w_test1_21" n="21">10</w>
|
|
25 |
<w id="w_test1_22" n="22">€</w>
|
|
26 |
<w id="w_test1_23" n="23">2</w>
|
|
27 |
<w id="w_test1_24" n="24">.</w>
|
|
28 |
<w id="w_test1_25" n="25">5</w>
|
|
29 |
<w id="w_test1_26" n="26">2</w>
|
|
30 |
<w id="w_test1_27" n="27">,</w>
|
|
31 |
<w id="w_test1_28" n="28">5</w>
|
|
32 |
<w id="w_test1_29" n="29">2</w>
|
|
33 |
<w id="w_test1_30" n="30">.</w>
|
|
34 |
<w id="w_test1_31" n="31">3</w>
|
|
35 |
<w id="w_test1_32" n="32">.</w>
|
|
36 |
<w id="w_test1_33" n="33">3</w>
|
|
37 |
<w id="w_test1_34" n="34">,</w>
|
|
38 |
<w id="w_test1_35" n="35">200</w>
|
|
39 |
<w id="w_test1_36" n="36">000</w>
|
|
40 |
<w id="w_test1_37" n="37">,</w>
|
|
41 |
<w id="w_test1_38" n="38">or</w>
|
|
42 |
<w id="w_test1_39" n="39">2</w>
|
|
43 |
<w id="w_test1_40" n="40">,</w>
|
|
44 |
<w id="w_test1_41" n="41">4</w>
|
|
45 |
<w id="w_test1_42" n="42">liste</w>
|
|
46 |
<w id="w_test1_43" n="43">:</w>
|
|
47 |
<w id="w_test1_44" n="44">-</w>
|
|
48 |
<w id="w_test1_45" n="45">premier</w>
|
|
49 |
<w id="w_test1_46" n="46">-</w>
|
|
50 |
<w id="w_test1_47" n="47">deuxième</w>
|
|
51 |
<w id="w_test1_48" n="48">-</w>
|
|
52 |
<w id="w_test1_49" n="49">troisième</w>
|
|
53 |
<w id="w_test1_50" n="50">mot-clé</w>
|
|
54 |
<w id="w_test1_51" n="51">une</w>
|
|
55 |
<w id="w_test1_52" n="52">phrase</w>
|
|
56 |
<w id="w_test1_53" n="53">,</w>
|
|
57 |
<w id="w_test1_54" n="54">un</w>
|
|
58 |
<w id="w_test1_55" n="55">mot</w>
|
|
59 |
<w id="w_test1_56" n="56">et</w>
|
|
60 |
<w id="w_test1_57" n="57">un</w>
|
|
61 |
<w id="w_test1_58" n="58">point</w>
|
|
62 |
<w id="w_test1_59" n="59">.</w>
|
|
63 |
<w id="w_test1_60" n="60">paren</w>
|
|
64 |
<w id="w_test1_61" n="61">(</w>
|
|
65 |
<w id="w_test1_62" n="62">thè</w>
|
|
66 |
<w id="w_test1_63" n="63">)</w>
|
|
67 |
<w id="w_test1_64" n="64">ses</w>
|
|
68 |
<w id="w_test1_65" n="65">parenthèses</w>
|
|
69 |
<w id="w_test1_66" n="66">(</w>
|
|
70 |
<w id="w_test1_67" n="67">bis</w>
|
|
71 |
<w id="w_test1_68" n="68">)</w>
|
|
72 |
<w id="w_test1_69" n="69">CDU</w>
|
|
73 |
<w id="w_test1_70" n="70">/</w>
|
|
74 |
<w id="w_test1_71" n="71">CSU-Koalition</w>
|
|
75 |
<w id="w_test1_72" n="72">Bar-le-Duc</w>
|
|
76 |
<w id="w_test1_73" n="73">le</w>
|
|
77 |
<w id="w_test1_74" n="74">Mont-d'or</w>
|
|
78 |
<w id="w_test1_75" n="75">Goutte-d'Or</w>
|
|
79 |
<w id="w_test1_76" n="76">aujourd'hui</w>
|
|
80 |
</div>
|
|
81 |
<div type="pretagged">
|
|
82 |
<w id="w_test1_77" n="77">
|
|
83 |
one word</w>
|
|
84 |
|
|
85 |
<w id="w_test1_78" n="78">
|
|
86 |
<a>
|
|
87 |
and</a>
|
|
88 |
<b>
|
|
89 |
another</b>
|
|
90 |
</w>
|
|
91 |
|
|
92 |
</div>
|
|
93 |
|
|
94 |
|
|
95 |
<div type="clitics">
|
|
96 |
|
|
97 |
|
|
98 |
C'est la fin.
|
|
99 |
|
|
100 |
It's the end. I've done so much.
|
|
101 |
|
|
102 |
</div>
|
|
103 |
|
|
104 |
|
|
105 |
<div type="planes">
|
|
106 |
|
|
107 |
une note<note>
|
|
108 |
la note</note>
|
|
109 |
<outsideToEdit>
|
|
110 |
ce texte ne va pas être tokenizé</outsideToEdit>
|
|
111 |
</div>
|
|
112 |
</text>
|
0 |
113 |
|