"""
Retrieve a list of information about countries, pulled from GeoNames.
Example entry:
{u'Area(in sq km)': u'33843',
u'Capital': u'Chi\u015fin\u0103u',
u'Continent': u'EU',
u'Country': u'Moldova',
u'CurrencyCode': u'MDL',
u'CurrencyName': u'Leu',
u'EquivalentFipsCode': u'',
u'ISO': u'MD',
u'ISO-Numeric': u'498',
u'ISO3': u'MDA',
u'Languages': u'mo,ro,ru,gag,tr',
u'Phone': u'373',
u'Population': u'4324000',
u'Postal Code Format': u'MD-####',
u'Postal Code Regex': u'^(?:MD)*(\\d{4})$',
u'fips': u'MD',
u'geonameid': u'617790',
u'neighbours': u'RO,UA',
u'tld': u'.md'}
"""
import urllib, codecs
COUNTRY_INFO_URL = "http://download.geonames.org/export/dump/countryInfo.txt"
def get_geonames_country_data():
"Returns a list of dictionaries, each representing a country"
udata = urllib.urlopen(COUNTRY_INFO_URL).read().decode('utf8')
# Strip the BOM
if udata[0] == codecs.BOM_UTF8.decode('utf8'):
udata = udata[1:]
# Ignore blank lines
lines = [l for l in udata.split('\n') if l]
# Find the line with the headers (starts #ISO)
header_line = [l for l in lines if l.startswith('#ISO')][0]
headers = header_line[1:].split('\t')
# Now get all the countries
country_lines = [l for l in lines if not l.startswith('#')]
countries = []
for line in country_lines:
countries.append(dict(zip(headers, line.split('\t'))))
return countries
Comments