MaxMind(R) GeoIP Lite CSV Import

Author:: jbronn
Posted:: July 18, 2007
Language:: Python
Version:: .96
Score:: 2 (after 2 ratings)

Download
Raw

Use this script to import the Maxmind GeoIP lite CSV datasets into your database. This takes at least 200MB of RAM; the resulting database will be ~400MB. Stick in the same directory as the models. Make sure to set DEBUG=False to prevent running out of memory during import.

# Copyright (c) 2007, Justin Bronn
# All rights reserved.
#
# Released under New BSD License
#
"""
  These scripts are used to import the MaxMind(R) GeoIP Lite CSV files.

  In order to save memory during import ensure DEBUG=False in your settings.
"""
from models import Country, CountryBlock, Location, LocationBlock
from django.contrib.gis.geos import Point
from csv import reader
import sys

def country_import(csv_file):

    fh = open(csv_file)
    table = reader(fh)

    header = table.next()

    for startip, endip, ipfrom, ipto, country, country_name in table:
        cntry, created = Country.objects.get_or_create(name=country_name, code=country)
        if created: print 'Created: %s' % cntry
        block = CountryBlock(ipto=ipto, ipfrom=ipfrom, startip=startip, endip=endip, country=cntry)
        block.save()

    fh.close()
    del table

def location_import(loc_csv, block_csv):

    if loc_csv:
        # First, importing from the Location CSV file.
        fh = open(loc_csv)
        table = reader(fh)
        header = table.next()

        # Caching the countries table in memory
        countries = dict((m.code, m) for m in Country.objects.all())

        i = 0
        for locid, cntry, reg, cty, postal, lat, lon, dma, area in table:
            pnt = Point(float(lon), float(lat))

            # The points for the countries are in the first 244 entries --
            #  pulling these out and updating the points
            if int(locid) < 244:
                try:
                    country = Country.objects.get(code=cntry)
                    country.point = pnt
                except:
                    country = Country(code=cntry, point=pnt)
                country.save()
                countries[cntry] = country # updating the country dictionary
            else:
                country = countries[cntry]

            # region and city
            region = reg.decode('UTF-8', 'ignore')
            city = cty.decode('UTF-8', 'ignore')

            # Constructing the Location
            loc = Location(locid=locid, country=country, region=region, city=city,
                           postalcode=postal, point=pnt, dmacode=dma, areacode=area)
            loc.save()
            i += 1
            if i % 10000 == 0: print 'Saved %d Locations so far ...' % i
        fh.close()
        del table
        del countries

    if block_csv:
        # Second, importing from the Location IP block CSV file
        fh = open(block_csv)
        table = reader(fh)
        header = table.next()

        # This will take a little bit... and ~200+MB of RAM
        print 'Caching Location table...',
        sys.stdout.flush()
        locations = dict((m.locid, m) for m in Location.objects.all())
        print 'DONE.'

        i = 0
        for ipfrom, ipto, locid in table:
            loc = locations[int(locid)] # pulling location from our cached table (less expensive than Location.objects.get())
            loc_block = LocationBlock(location=loc, ipfrom=ipfrom, ipto=ipto)
            loc_block.save()
            i += 1
            if i % 10000 == 0: print 'Saved %d Location Blocks so far ...' % i

        fh.close()
        del table
        del locations

Comments

leland (on February 22, 2008):

If you are using Python < 2.5, csv.reader() will throw an error if any of the cells contain \r characters. To get around this, change this line:

fh = open(csv_file)

to this:

fh = open(csv_file, 'rUb')

leland (on February 22, 2008):

the backslash before the r didn't show up in my previous comment, but it's a return - \r

crapufish (on January 28, 2010):

The latest versions of CSV files include, on the first line, a Copyright statement.

The script above should be changed as follows:

Replace all occurrences of:

table = reader(fh)
header = table.next()

with:

table = reader(fh)
table.next() # Skip the copyright line.
header = table.next()

Enjoy!

Please login first before commenting.

MaxMind(R) GeoIP Lite CSV Import

More like this

Comments