Login

autotranslate po files using google translate

Author:
dnordberg
Posted:
September 11, 2008
Language:
Python
Version:
1.0
Score:
2 (after 2 ratings)

Save to autotranslate.py, run using python autotranslate.py pofile inputlang outputlang, eg. python autotranslate.py path_to_blank_fr_lang.po en fr, to translate to french.

Some known bugs:

  • Doesn't handle some line returns properly
  • Block translations aren't formated correctly in the translation.

If anyone has any issues or fixes please post to the comments.

Of course the output shouldn't be used as substitute to a proper translation.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/python
from translate.storage import po
from django.utils import simplejson
import sys, os, re, urllib
from htmlentitydefs import name2codepoint
 
def htmldecode(text):
        """Decode HTML entities in the given text."""
        if type(text) is unicode:
                uchr = unichr
        else:
                uchr = lambda value: value > 255 and unichr(value) or chr(value)
        def entitydecode(match, uchr=uchr):
                entity = match.group(1)
                if entity.startswith('#x'):
                        return uchr(int(entity[2:], 16))
                elif entity.startswith('#'):
                        return uchr(int(entity[1:]))
                elif entity in name2codepoint:
                        return uchr(name2codepoint[entity])
                else:
                        return match.group(0)
        charrefpat = re.compile(r'&(#(\d+|x[\da-fA-F]+)|[\w.:-]+);?')
        return charrefpat.sub(entitydecode, text)

def get_translation(sl, tl, text):
    """
    Response is in the format
   '{"responseData": {"translatedText":"Ciao mondo"}, "responseDetails": null, "responseStatus": 200}''' 
    """
    if text.startswith('"'): text = text[1:-1]
    params = {'v':'1.0', 'q': text.encode('utf-8')}
    try:
        result = simplejson.load(urllib.urlopen('http://ajax.googleapis.com/ajax/services/language/translate?%s&langpair=%s%%7C%s' % (urllib.urlencode(params), sl, tl)))
    except IOError, e:
        print e
        return ""
    else:
        try:
            status = result['responseStatus']
        except KeyError:
            status = -1
        if status == 200:
            return result['responseData']['translatedText']
        else:
            print "Error %s: Translating string %s" % (status, text)
            return ""

def translate_po(file, sl, tl):
    openfile = po.pofile(open(file))
    nb_elem = len(openfile.units)
    moves = 1
    cur_elem = 0
    for unit in  openfile.units:
        # report progress
        cur_elem += 1
        s = "\r%f %% - (%d msg processed out of %d) " \
            % (100 * float(cur_elem) / float(nb_elem), cur_elem, nb_elem)
        sys.stderr.write(s)
        if not unit.isheader():
            if len(unit.msgid):
                if unit.msgstr==[u'""']:
                    moves += 1
                    unit.msgstr = ['"%s"' % htmldecode(get_translation(sl, tl, x)) for x in unit.msgid ]
        if not bool(moves % 50):
            print "Saving file..."
            openfile.save()
    openfile.save()

if __name__ == "__main__":

    if len(sys.argv) < 4 or \
       not os.path.exists(sys.argv[1]):
        sys.stderr.write("""
usage example: python autotranslate.py <lang.po> en fr
""")
        sys.exit(1)
    else:
        in_pofile = os.path.abspath(sys.argv[1])
        from_lang = sys.argv[2]
        to_lang = sys.argv[3]
        print('Translating %s to %s' %(from_lang,  to_lang))
        translate_po(in_pofile, from_lang, to_lang)
        print('Translation done')

More like this

  1. Template tag - list punctuation for a list of items by shapiromatron 11 months, 2 weeks ago
  2. JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 11 months, 3 weeks ago
  3. Serializer factory with Django Rest Framework by julio 1 year, 6 months ago
  4. Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 7 months ago
  5. Help text hyperlinks by sa2812 1 year, 7 months ago

Comments

dk1 (on October 27, 2008):

Great script! Exactly what I was looking for.

There were two typos, on line 78 it should be < 4, not < 1

and on line 80, sys.stderr not sys.strerr

#

Please login first before commenting.