from __future__ import unicode_literals
import re
from html import unescape, escape, entities
from django.template.library import Library
from django.utils.safestring import SafeString
from django.conf import settings
from hyphen import hyphenator, dictools
register = Library()
OUTSIDE_TAGS_RE = re.compile(r'([^<]*)(<[^>]*>)?', re.MULTILINE)
WORD_RE = re.compile(r'\w{5,}')
SHY = entities.html5['shy;']
@register.filter
def hyphen(txt, arg=None):
"""
Hyphenate a text by automatically adding `` characters inside the
words. This will prompt the browser to add an hyphen at those locations if
needed.
Usage: `
{{ page.title | hyphen }}
`
Inspired by https://djangosnippets.org/snippets/1446/
:param txt: string to hyphenate
:param arg: target language (in the `fr-fr` form)
:return: a hyphenated string
"""
if arg:
code = arg
else:
code = settings.LANGUAGE_CODE
s = code.split('-')
try:
lang = s[0].lower() + '_' + s[1].upper()
except IndexError:
lang = s[0].lower() + '_' + s[0].upper()
if not dictools.is_installed(lang):
dictools.install(lang)
h = hyphenator.Hyphenator(lang)
def hyphen_word(m):
return SHY.join(h.syllables(m.group(0)))
out = WORD_RE.sub(hyphen_word, txt)
return out
@register.filter
def hyphen_html(html, arg=None):
"""
Hyphenates the text inside HTML code using the `hyphen()` filter above.
Usage: `{{ page.body | hyphen_html }}
`
:param html: HTML code to be hyphenated
:param arg: language code in the form `fr-fr`.
:return: a safe string with hyphenated HTML inside
"""
def hyphen_outside_tags(match):
return escape(hyphen(unescape(match.group(1)), arg)) + match.group(2)
out = OUTSIDE_TAGS_RE.sub(hyphen_outside_tags, str(html))
return SafeString(out)