Login

MaxMind(R) GeoIP Lite CSV Import

Author:
jbronn
Posted:
July 18, 2007
Language:
Python
Version:
.96
Score:
2 (after 2 ratings)

Use this script to import the Maxmind GeoIP lite CSV datasets into your database. This takes at least 200MB of RAM; the resulting database will be ~400MB. Stick in the same directory as the models. Make sure to set DEBUG=False to prevent running out of memory during import.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# Copyright (c) 2007, Justin Bronn
# All rights reserved.
#
# Released under New BSD License
#
"""
  These scripts are used to import the MaxMind(R) GeoIP Lite CSV files.

  In order to save memory during import ensure DEBUG=False in your settings.
"""
from models import Country, CountryBlock, Location, LocationBlock
from django.contrib.gis.geos import Point
from csv import reader
import sys

def country_import(csv_file):

    fh = open(csv_file)
    table = reader(fh)

    header = table.next()

    for startip, endip, ipfrom, ipto, country, country_name in table:
        cntry, created = Country.objects.get_or_create(name=country_name, code=country)
        if created: print 'Created: %s' % cntry
        block = CountryBlock(ipto=ipto, ipfrom=ipfrom, startip=startip, endip=endip, country=cntry)
        block.save()

    fh.close()
    del table

def location_import(loc_csv, block_csv):

    if loc_csv:
        # First, importing from the Location CSV file.
        fh = open(loc_csv)
        table = reader(fh)
        header = table.next()

        # Caching the countries table in memory
        countries = dict((m.code, m) for m in Country.objects.all())

        i = 0
        for locid, cntry, reg, cty, postal, lat, lon, dma, area in table:
            pnt = Point(float(lon), float(lat))

            # The points for the countries are in the first 244 entries --
            #  pulling these out and updating the points
            if int(locid) < 244:
                try:
                    country = Country.objects.get(code=cntry)
                    country.point = pnt
                except:
                    country = Country(code=cntry, point=pnt)
                country.save()
                countries[cntry] = country # updating the country dictionary
            else:
                country = countries[cntry]

            # region and city
            region = reg.decode('UTF-8', 'ignore')
            city = cty.decode('UTF-8', 'ignore')

            # Constructing the Location
            loc = Location(locid=locid, country=country, region=region, city=city,
                           postalcode=postal, point=pnt, dmacode=dma, areacode=area)
            loc.save()
            i += 1
            if i % 10000 == 0: print 'Saved %d Locations so far ...' % i
        fh.close()
        del table
        del countries

    if block_csv:
        # Second, importing from the Location IP block CSV file
        fh = open(block_csv)
        table = reader(fh)
        header = table.next()

        # This will take a little bit... and ~200+MB of RAM
        print 'Caching Location table...',
        sys.stdout.flush()
        locations = dict((m.locid, m) for m in Location.objects.all())
        print 'DONE.'

        i = 0
        for ipfrom, ipto, locid in table:
            loc = locations[int(locid)] # pulling location from our cached table (less expensive than Location.objects.get())
            loc_block = LocationBlock(location=loc, ipfrom=ipfrom, ipto=ipto)
            loc_block.save()
            i += 1
            if i % 10000 == 0: print 'Saved %d Location Blocks so far ...' % i

        fh.close()
        del table
        del locations

More like this

  1. Template tag - list punctuation for a list of items by shapiromatron 10 months, 1 week ago
  2. JSONRequestMiddleware adds a .json() method to your HttpRequests by cdcarter 10 months, 2 weeks ago
  3. Serializer factory with Django Rest Framework by julio 1 year, 5 months ago
  4. Image compression before saving the new model / work with JPG, PNG by Schleidens 1 year, 6 months ago
  5. Help text hyperlinks by sa2812 1 year, 6 months ago

Comments

leland (on February 22, 2008):

If you are using Python < 2.5, csv.reader() will throw an error if any of the cells contain \r characters. To get around this, change this line:

fh = open(csv_file)

to this:

fh = open(csv_file, 'rUb')

#

leland (on February 22, 2008):

the backslash before the r didn't show up in my previous comment, but it's a return - \r

#

crapufish (on January 28, 2010):

The latest versions of CSV files include, on the first line, a Copyright statement.

The script above should be changed as follows:

Replace all occurrences of:

table = reader(fh)
header = table.next()

with:

table = reader(fh)
table.next() # Skip the copyright line.
header = table.next()

Enjoy!

#

Please login first before commenting.