Login

Translation statistics gatherer

Author:
ramiro
Posted:
August 17, 2008
Language:
Python
Version:
.96
Tags:
internationalization i18n l10n translations status statistics localization
Score:
0 (after 0 ratings)

A script that gathers statistics of translated, untranslated and fuzzy literals of translations (be it Django itself or a project using Django).

For that it re-scans the tree and generates a up-to-date POT in a temporary location, so the statistics of translation "coverage" are calculated relative to the current status of the tree. It doesn't touch the tree it is analyzing at all.

It should be run from the directory containing the locale/ directory of your project or from the django/ directory of a Django copy.

It is based on the makemessages Django management command (or rather its previous standalone make-messages.py script incarnation) and uses the same command line switches:

  • -d <domain> -- <domain> is django or djangojs. Optional, defaults to django.
  • -l <language> OR
  • -a -- process all languages
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/usr/bin/env python

# Need to ensure that the i18n framework is enabled
from django.conf import settings
settings.configure(USE_I18N = True)

from django.utils.translation import templatize
import re
import os
import sys
import getopt
from itertools import dropwhile
from tempfile import mkdtemp
from shutil import rmtree

pythonize_re = re.compile(r'\n\s*//')
msgfmt_re = re.compile(r'((?P<transl>\d+) translated messages?)?((, )?(?P<fuzzy>\d+) fuzzy translations?)?((, )?(?P<untransl>\d+) untranslated messages?)?\.')

def calculate_stats():
    localedir = None

    if os.path.isdir(os.path.join('conf', 'locale')):
        #localedir = os.path.abspath(os.path.join('conf', 'locale'))
        localedir = os.path.join('conf', 'locale')
    elif os.path.isdir('locale'):
        #localedir = os.path.abspath('locale')
        localedir = 'locale'
    else:
        print "This script should be run from the django svn tree or your project or app tree."
        print "If you did indeed run it from the svn checkout or your project or application,"
        print "maybe you are just missing the conf/locale (in the django tree) or locale (for project"
        print "and application) directory?."
        sys.exit(1)

    (opts, args) = getopt.getopt(sys.argv[1:], 'l:d:va')

    lang = None
    domain = 'django'
    verbose = False
    all = False

    for o, v in opts:
        if o == '-l':
            lang = v
        elif o == '-d':
            domain = v
        elif o == '-v':
            verbose = True
        elif o == '-a':
            all = True

    if domain not in ('django', 'djangojs'):
        print "currently l10n-stats.py only supports domains 'django' and 'djangojs'"
        sys.exit(1)
    if (lang is None and not all) or domain is None:
        print "usage: l10n-stats.py -l <language>"
        print "   or: l10n-stats.py -a"
        sys.exit(1)

    languages = []
    if lang is not None:
        languages.append(lang)
    elif all:
        languages = [el for el in os.listdir(localedir) if not el.startswith('.')]

    if not languages:
        sys.exit(0)

    workdir = mkdtemp()
    potfile = os.path.join(workdir, '%s.pot' % domain)
    if os.path.exists(potfile):
        os.unlink(potfile)

    for (dirpath, dirnames, filenames) in os.walk("."):
        for file in filenames:
            if domain == 'djangojs' and file.endswith('.js'):
                if verbose: sys.stdout.write('processing file %s in %s\n' % (file, dirpath))
                data = open(os.path.join(dirpath, file), "rb").read()
                data = pythonize_re.sub('\n#', data)
                thefile = '%s.py' % file
                open(os.path.join(dirpath, thefile), "wb").write(data)
                cmd = 'xgettext %s -d %s -L Perl --keyword=gettext_noop --keyword=gettext_lazy --keyword=ngettext_lazy:1,2 --from-code UTF-8 -o - "%s"' % (
                    os.path.exists(potfile) and '--omit-header' or '', domain, os.path.join(dirpath, thefile))
                (stdin, stdout, stderr) = os.popen3(cmd, 't')
                msgs = stdout.read()
                errors = stderr.read()
                if errors:
                    sys.stderr.write('errors happened while running xgettext on %s\n' % file)
                    sys.stderr.write(errors)
                    rmtree(workdir, True)
                    sys.exit(8)
                if msgs:
                    open(potfile, 'ab').write(msgs)
                os.unlink(os.path.join(dirpath, thefile))
            elif domain == 'django' and (file.endswith('.py') or file.endswith('.html')):
                thefile = file
                litfile = os.path.join(dirpath, file)
                if file.endswith('.html'):
                    data = open(litfile, "rb").read()
                    thefile = '%s.py' % file
                    litdir = os.path.join(workdir, dirpath)
                    if not os.path.isdir(litdir):
                        os.makedirs(litdir)
                    litfile = os.path.join(litdir, thefile)
                    open(litfile, "wb").write(templatize(data))
                if verbose:
                    sys.stdout.write('processing file %s in %s\n' % (file, dirpath))
                cmd = 'xgettext -d %s -L Python --keyword=gettext_noop --keyword=gettext_lazy --keyword=ngettext_lazy:1,2 --keyword=ugettext_noop --keyword=ugettext_lazy --keyword=ungettext_lazy:1,2 --from-code UTF-8 -o - "%s"' % (
                    domain, litfile)
                (stdin, stdout, stderr) = os.popen3(cmd, 't')
                msgs = stdout.read()
                errors = stderr.read()
                if errors:
                    sys.stderr.write('errors happened while running xgettext on %s\n' % file)
                    sys.stderr.write(errors)
                    rmtree(workdir, True)
                    sys.exit(8)
                if os.path.exists(potfile):
                    # Strip the header
                    msgs = '\n'.join(dropwhile(len, msgs.split('\n')))
                else:
                    msgs = msgs.replace('charset=CHARSET', 'charset=UTF-8')
                if msgs:
                    open(potfile, 'ab').write(msgs)

    if os.path.exists(potfile):
        (stdin, stdout, stderr) = os.popen3('msguniq --to-code=utf-8 "%s"' % potfile, 'b')
        pot_msgs = stdout.read()
        errors = stderr.read()
        if errors:
            sys.stderr.write('errors happened while running msguniqi\n')
            sys.stderr.write(errors)
            rmtree(workdir, True)
            sys.exit(8)
        open(potfile, 'w').write(pot_msgs)
    else:
        sys.exit(0)

    for lang in languages:

        basedir = os.path.join(localedir, lang, 'LC_MESSAGES')
        if not os.path.isdir(basedir):
            continue

        dstdir = os.path.join(workdir, basedir)
        if not os.path.isdir(dstdir):
            os.makedirs(dstdir)

        pofile = os.path.join(basedir, '%s.po' % domain)
        dstfile = os.path.join(dstdir, '%s.po' % domain)
        if os.path.exists(pofile):
            (stdin, stdout, stderr) = os.popen3('msgmerge -q "%s" "%s"' % (pofile, potfile), 'b')
            msgs = stdout.read()
            errors = stderr.read()
            if errors:
                sys.stderr.write('errors happened while running msgmerge\n')
                sys.stderr.write(errors)
                rmtree(workdir, True)
                sys.exit(8)
            open(dstfile, 'wb').write(msgs)
        else:
            open(dstfile, 'wb').write(pot_msgs)

        (stdin, stdout, stderr) = os.popen3('LC_ALL=C msgfmt --statistics -o - "%s"' % dstfile, 't')
        dummy = stdout.read()
        data = stderr.read()
        mo = msgfmt_re.match(data)
        if mo:
            groups = mo.groupdict('0')
            transl = int(groups['transl'])
            fuzzy = int(groups['fuzzy'])
            untransl = int(groups['untransl'])
            total = transl + fuzzy + untransl
            print("%s: translated: %d%%, fuzzy: %d%%, untranslated: %d%%" % (lang, transl*100/total, fuzzy*100/total, untransl*100/total))

    #os.unlink(potfile)
    #os.rmdir(workdir)
    rmtree(workdir, True)

if __name__ == "__main__":
    calculate_stats()

More like this

  1. typygmentdown by ubernostrum 7 years, 9 months ago
  2. Modeli18n by pavl 4 years, 11 months ago
  3. Mobilize your Django site by stevena0 6 years, 1 month ago
  4. locale based on domain by zeeg 7 years, 11 months ago
  5. LocaleMiddleware without browser language discovery by ivellios 2 months, 4 weeks ago

Comments

Please login first before commenting.