Login

Soft hyphenation filters

Author:
Xowap
Posted:
October 19, 2017
Language:
Python
Version:
Not specified
Tags:
filter hyphenation
Score:
0 (after 0 ratings)

Automatically hyphenate raw text or HTML code

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
from __future__ import unicode_literals

import re
from html import unescape, escape, entities
from django.template.library import Library
from django.utils.safestring import SafeString
from django.conf import settings
from hyphen import hyphenator, dictools

register = Library()

OUTSIDE_TAGS_RE = re.compile(r'([^<]*)(<[^>]*>)?', re.MULTILINE)
WORD_RE = re.compile(r'\w{5,}')
SHY = entities.html5['shy;']


@register.filter
def hyphen(txt, arg=None):
    """
    Hyphenate a text by automatically adding `&shy;` characters inside the
    words. This will prompt the browser to add an hyphen at those locations if
    needed.

    Usage: `<h1>{{ page.title | hyphen }}</h1>`

    Inspired by https://djangosnippets.org/snippets/1446/

    :param txt: string to hyphenate
    :param arg: target language (in the `fr-fr` form)
    :return: a hyphenated string
    """

    if arg:
        code = arg
    else:
        code = settings.LANGUAGE_CODE

    s = code.split('-')
    try:
        lang = s[0].lower() + '_' + s[1].upper()
    except IndexError:
        lang = s[0].lower() + '_' + s[0].upper()

    if not dictools.is_installed(lang):
        dictools.install(lang)

    h = hyphenator.Hyphenator(lang)

    def hyphen_word(m):
        return SHY.join(h.syllables(m.group(0)))

    out = WORD_RE.sub(hyphen_word, txt)

    return out


@register.filter
def hyphen_html(html, arg=None):
    """
    Hyphenates the text inside HTML code using the `hyphen()` filter above.

    Usage: `<div class="content">{{ page.body | hyphen_html }}</div>`

    :param html: HTML code to be hyphenated
    :param arg: language code in the form `fr-fr`.
    :return: a safe string with hyphenated HTML inside
    """

    def hyphen_outside_tags(match):
        return escape(hyphen(unescape(match.group(1)), arg)) + match.group(2)

    out = OUTSIDE_TAGS_RE.sub(hyphen_outside_tags, str(html))

    return SafeString(out)

More like this

Comments

Please login first before commenting.