# XML/HTML entities
\< ==> &lt;
\> ==> &gt;

# Punctuation, paragraphs, spaces
([[\u0000-\u001F\u0250-\u1DFF\u1F00-\uFFFF\u0021\u0022\u0023\u0024\u0025\u0026\u0027\u0028\u0029\u002A\u002B\u002C\u002D\u002E\u002F\u003A\u003B\u003C\u003D\u003E\u003F\u0040\u005B\u005C\u005D\u005E\u005F\u0060\u007B\u007C\u007D\u007E\u007F\u0080\u0081\u0082\u0083\u0084\u0085\u0086\u0087\u0088\u0089\u008A\u008B\u008C\u008D\u008E\u008F\u0090\u0091\u0092\u0093\u0094\u0095\u0096\u0097\u0098\u0099\u009A\u009B\u009C\u009D\u009E\u009F\u00A1\u00A2\u00A3\u00A4\u00A5\u00A6\u00A7\u00A8\u00A9\u00AB\u00AC\u00AD\u00AE\u00AF\u00B0\u00B1\u00B4\u00B6\u00B7\u00B8\u00BB\u00BF\u00D7\u00F7\u1E9C\u1E9D\u1E9E\u1E9F\u1EFA\u1EFB\u1EFC\u1EFD\u1EFE\u1EFF]&&[^\s]]) ==> <c>\1</c>
<c>&</c>((amp)|(lt)|(gt))<c>;</c> ==> <c>&\1;</c>
^ ==> <p>
$ ==> </p>
\s ==> <S/>

# Words
<S/> ==> </w><S/><w>
<c> ==> </w><c>
</c> ==> </c><w>
<p> ==> <p><w>			
</p> ==> </w></p>	

# Cleanup
</p>(\n*)</w><S/><w> ==> </p>\1
<w></w> ==> 

# E-mail (case-insensitive regex matching is denoted with '-->')
# (TB.)*TB@(TB.)*TB.TLD
# TB: ((<w>[\p{L}0-9]+</w>)|(<c>[_-]</c>))+
# TLD: <w>[\p{L}]{2,6}</w>
(((<w>[\p{L}0-9]+</w>)|(<c>[_-]</c>))+<c>\.</c>)*((<w>[\p{L}0-9]+</w>)|(<c>[_-]</c>))+<c>@</c>(((<w>[\p{L}0-9]+</w>)|(<c>[_-]</c>))+<c>\.</c>)*((<w>[\p{L}0-9]+</w>)|(<c>[_-]</c>))+<c>\.</c><w>[\p{L}]{2,6}</w> --> <w>$txt</w>

# URL 
(<w>((ftp)|(https?))</w><c>:</c><c>/</c><c>/</c>)?(((<w>[0-9\p{L}]+</w>)|(<c>[~_!*'-]</c>))+<c>\.</c>)*<w>([0-9\p{L}][0-9\p{L}-]{0,61})?[0-9\p{L}]</w><c>\.</c><w>[\p{L}]{2,6}</w>(<c>/</c>((<w>[0-9\p{L}]+</w>)|(<c>[~_!*'().;?:@=+$,%#-]</c>)|(<c>&</c>))*((<w>[0-9\p{L}]+</w>)|(<c>[~_*'@=+$%#-]</c>)|(<c>&</c>)))*(<c>/</c>)? --> <w>$txt</w>

# Brioni rule #1.2
<w>([^<.]+)\.(\p{Lu}) ==> <w>\1</w><c>.</c></s><s><w>\2

# Suffixes (incl. Brioni rule #2.6) 
<w>([^<]+)</w><c>([-–—])</c><w>((a)|(evemu)|(evskega)|(i)|(jevega)|(jevska)|(jevskimi)|(jinemu)|(oma)|(ovim)|(ovski)|(e)|(evi)|(evskem)|(ih)|(jevem)|(jevske)|(jevsko)|(jini)|(ov)|(ovima)|(ovskih)|(em)|(evih)|(evskemu)|(ja)|(jevemu)|(jevskega)|(ji)|(jinih)|(ova)|(ovimi)|(ovskim)|(ema)|(evim)|(evski)|(je)|(jevi)|(jevskem)|(jih)|(jinim)|(ove)|(ovo)|(ovskima)|(ev)|(evima)|(evskih)|(jem)|(jevih)|(jevskemu)|(jin)|(jinima)|(ovega)|(ovska)|(ovskimi)|(eva)|(evimi)|(evskim)|(jema)|(jevim)|(jevski)|(jina)|(jinimi)|(ovem)|(ovske)|(ovsko)|(eve)|(evo)|(evskima)|(jev)|(jevima)|(jevskih)|(jine)|(jino)|(ovemu)|(ovskega)|(u)|(evega)|(evska)|(evskimi)|(jeva)|(jevimi)|(jevskim)|(jinega)|(ju)|(ovi)|(ovskem)|(evem)|(evske)|(evsko)|(jeve)|(jevo)|(jevskima)|(jinem)|(om)|(ovih)|(ovskemu)|(ovec)|(ovca)|(ovcu)|(ovcem)|(ovcev)|(ovcema)|(ovcih)|(ovci)|(ovce)|(ovcimi)|(evec)|(evca)|(evcu)|(evcem)|(evcev)|(evcema)|(evcih)|(evci)|(evce)|(evcimi)|(jevec)|(jevca)|(jevcu)|(jevcem)|(jevcev)|(jevcema)|(jevcih)|(jevci)|(jevce)|(jevcimi)|(ovka)|(ovke)|(ovki)|(ovko)|(ovk)|(ovkama)|(ovkah)|(ovkam)|(ovkami)|(evka)|(evke)|(evki)|(evko)|(evk)|(evkama)|(evkah)|(evkam)|(evkami)|(jevka)|(jevke)|(jevki)|(jevko)|(jevk)|(jevkama)|(jevkah)|(jevkam)|(jevkami))</w> ==> <w>\1\2\3</w>

<w>(\d+)</w><c>([-–—])</c><w>((timi)|(im)|(ima)|(a)|(imi)|(e)|(o)|(ega)|(ti)|(em)|(tih)|(emu)|(tim)|(i)|(tima)|(ih)|(ta)|(te)|(to)|(tega)|(tem)|(temu))</w> ==> <w>\1\2\3</w>

</w><c>'</c><w> ==> '

(<w>\d+</w><c>[.,:]</c>)+<w>\d+\p{L}*</w> ==> <w>$txt</w>

#
# End of Part 1
#
