Login   Register  
PHP Classes
elePHPant
Icontem

File: processCities.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Aziz S. Hussain  >  Process MaxMind Countries, Regions and Cities Files  >  processCities.php  >  Download  
File: processCities.php
Role: Class source
Content type: text/plain
Description: Main class file
Class: Process MaxMind Countries, Regions and Cities Files
Extract MaxMind geographic data into a database
Author: By
Last change: Replaced tabs with spaces
Date: 2012-10-14 20:45
Size: 13,350 bytes
 

Contents

Class file image Download
<?php
/**
 * processCities.php
 *
 * PHP version >= 4.30
 *
 * @author    Aziz Hussain <azizsaleh@gmail.com>
 * @copyright GPL license
 * @license   http://www.gnu.org/copyleft/gpl.html
 * @link      http://www.azizsaleh.com
 */

/**
 * ProcessCities
 *
 * Download and process MaxMind geo, region and city list and relationships.
 *
 * Usage Options:
 * f -> Directory to download/extract files to (DEFAULT: Current working directory)
 * r -> if set, it will force to download the files again as opposed to using existing files by default
 * o -> what type of db to use, available options 'lite' (sqlite) or 'mysql'
 *
 * d -> Database file name for sqlite or db name for MySQL
 *
 * If using mysql, the following options can also be used:
 * h -> MySQL hostname (DEFAULT: localhost)
 * u -> MySQL username (DEFAULT: root)
 * p -> MySQL password (DEFAULT: empty)
 *
 * Example usage:
 *
 * SqLite:
 * php processCities.php -f /tmp -d /tmp/geodbLite.sqlite
 * or the following (DEFAULT: will use maxmindDb.sqlite on current working directory)
 * php processCities.php -f /tmp
 *
 * MySQL:
 * php processCities.php -f /tmp -o mysql -h 192.168.1.1 -u root -p rootpass -d geo_db
 *
 *
 * @author    Aziz Hussain <azizsaleh@gmail.com>
 * @copyright GPL license
 * @license   http://www.gnu.org/copyleft/gpl.html
 * @link      http://www.azizsaleh.com
 */
class processCities
{
    /**
     * Maxmind resource links
     *
     * @var array
     */
    public $links = array (
        'countries'    => 'http://www.maxmind.com/app/iso3166',
        'regions'    => 'http://dev.maxmind.com/static/maxmind-region-codes.csv',
        'cities'    => 'http://www.maxmind.com/download/worldcities/worldcitiespop.txt.gz',
    );

    /**
     * User options
     *
     * @var array
     */
    public $options;

    /**
     * Base directory, where files are downloaded to
     *
     * @var string
     */
     public $base;

    /**
     * Constructor - check user options, download any files needed
     *
     * @return void
     */
    public function __construct()
    {
        // Custom temp directory
        $this->options = getopt('f:r:h:u:p:d:o:');

        if (!empty($this->options['f'])) {
            $this->base = $this->options['f'];
        } else {
            $this->base = __DIR__;
        }

        if (!is_writable($this->base)) {
            die('Unable to access temp directory: ' . $this->base);
        }

        $this->base = rtrim($this->base, '/') . '/';

        // Fresh copies
        if (!empty($this->options['r'])) {
            foreach (array('countries.txt', 'regions.txt', 'cities.txt') as $file) {
                if (file_exists($this->base . $file)) {
                    if (!is_writable($this->base . $file)) {
                        die('Unable to remove file: ' . $this->base . $file);
                    }

                    unlink ($this->base . $file);
                }
            }
        }

        // Download countries?
        if (!file_exists($this->base . 'countries.txt')) {
            $countries = file_get_contents($this->links['countries']);
            $res = preg_split('/<\/?pre>/', $countries);
            if (empty($res[1])) {
                die('It seems like Maxmind has changed countries list location');
            }
            $countries = trim($res[1]);
            file_put_contents($this->base . 'countries.txt', $countries);
            unset($res, $countries);
        }

        // Download regions?
        if (!file_exists($this->base . 'regions.txt')) {
            $regions = file_get_contents($this->links['regions']);
            file_put_contents($this->base . 'regions.txt', $regions);
            unset($regions);
        }

        // Download/Extract cities?
        if (!file_exists($this->base . 'cities.txt')) {
            // Download file
            exec("wget -O '{$this->base}cities.txt.gz' '{$this->links['cities']}'");

            // Extract file
            exec("gunzip -f {$this->base}cities.txt.gz");
        }
    }

    /**
     * Process
     *
     * @return void
     */
    public function process()
    {
        try {
            // Do via MySQL
            if (isset($this->options['o']) && strtolower($this->options['o']) == 'mysql') {
                $this->processToMySQL();
                return;
            }

            // Do sqllite by default
            $this->processToLite();

        } catch (Exception $error) {
            die($error->getMessage());
        }
    }

    /**
     * Start processing downloaded files to SQL Db
     *
     * @return void
     */
    public function processToLite()
    {
        // Connect to sqlite DB
        $name = 'maxmindDb.sqlite';
        if (!empty($this->options['d'])) {
            $name = $this->options['d'];
        }

        $db = new PDO('sqlite:' . $name);

        // Drop/Create Tables
        $queries = array (
            'DROP TABLE IF EXISTS countries',
            'DROP TABLE IF EXISTS regions',
            'DROP TABLE IF EXISTS cities',
            'CREATE TABLE countries (
                country_id int,
                country_code varchar(4),
                country_name varchar(255),
                PRIMARY KEY(country_id ASC)
            )',
            'CREATE TABLE regions (
                region_id varchar(2),
                country_id int,
                region_name varchar(255)
            )',
            'CREATE TABLE cities (
                city_id int,
                region_id varchar(2),
                country_id int,
                city_name varchar(255),
                ascii_name varchar(255)
            )',
            'CREATE INDEX region_id ON cities (region_id) ASC',
        );

        // Execute drop/create queries
        foreach ($queries as $query) {
            $db->exec($query);
        }

        $this->addRecords($db);
    }

    /**
     * Start processing downloaded files to MySQL Db
     *
     * @return void
     */
    public function processToMySQL()
    {
        // Default Db Settings
        $settings = array (
            'host'  => 'localhost',
            'user'  => 'root',
            'pass'  => '',
            'db'    => 'maxmind',
        );

        // Check user settings
        foreach (array(
            'h' => 'host',
            'p' => 'pass',
            'd' => 'db',
            'u' => 'user',
        ) as $input => $toVar) {
            if (!empty($this->options[$input])) {
                $settings[$toVar] = $this->options[$input];
            }
        }

        // Connect to Db
        $db = new PDO("mysql:dbname={$settings['db']};host={$settings['host']}", $settings['user'], $settings['pass']);

        // Drop/Create Tables
        $queries = array (
            'DROP TABLE IF EXISTS countries',
            'DROP TABLE IF EXISTS regions',
            'DROP TABLE IF EXISTS cities',
            'CREATE TABLE `countries` (
                `country_id` INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY ,
                `country_code` VARCHAR( 4 ) NOT NULL ,
                `country_name` VARCHAR( 255 ) NOT NULL
            )',
            'CREATE TABLE `regions` (
                `region_id` VARCHAR (2) NOT NULL ,
                `country_id` INT UNSIGNED NOT NULL ,
                `region_name` VARCHAR( 255 ) NOT NULL ,
                PRIMARY KEY ( `region_id`, `country_id` )
            )',
            'CREATE TABLE `cities` (
                `city_id` INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY ,
                `region_id` VARCHAR(2) NOT NULL ,
                `country_id` INT UNSIGNED NOT NULL ,
                `city_name` VARCHAR( 255 ) NOT NULL ,
                `ascii_name` VARCHAR( 255 ) NOT NULL,
                 INDEX ( `region_id` )
            )',
        );

        // Execute drop/create queries
        foreach ($queries as $query) {
            $db->exec($query);
        }

        $this->addRecords($db, 'mysql');
    }

    /**
     * Add records to Db
     *
     * @param   resource                $db     Db Handler
     * @param   enum ('lite', 'mysql')  $type   Db Type
     *
     * @return  void
     */
    public function addRecords($db, $type = 'lite')
    {
        // Country code to Id mapping
        $countryMapping = array();

        // Insert countries
        echo 'Starting countries' . PHP_EOL;
        $countries = file_get_contents($this->base . 'countries.txt');
        $countries = explode(PHP_EOL, $countries);

        // Foreach country
        foreach ($countries as $country) {
            // Check for empty lines
            $country = trim($country);
            if (empty($country)) {
                continue;
            }

            // Construct SQL
            list($countryCode, $countryName) = explode(',', $country);
            $countryName = $this->escape(trim($countryName, '"'));
            $sql = "INSERT INTO countries (country_code, country_name) VALUES ('$countryCode', '$countryName')";

            // Add to Db
            $db->exec($sql);
            $lastId = $db->lastInsertId();
            $countryMapping[$countryCode] = $lastId;
        }
        echo 'Finished countries' . PHP_EOL;

        // Insert regions
        echo 'Starting regions' . PHP_EOL;
        $regions = file_get_contents($this->base . 'regions.txt');
        $regions = explode(PHP_EOL, $regions);

        // Foreach region
        foreach ($regions as $region) {

            // Check for empty lines
            $region = trim($region);
            if (empty($region)) {
                continue;
            }

            // Construct SQL
            list($countryCode, $regionId, $regionName) = explode(',', $region);
            $regionName = $this->escape(trim($regionName, '"'));

            // Get country Id based on mapping
            $countryCode = $countryMapping[$countryCode];

            $sql = "INSERT INTO regions (region_id, country_id, region_name) VALUES
            ('$regionId', '$countryCode', '$regionName')";

            // Add to Db
            $db->exec($sql);
        }
        echo 'Finished regions' . PHP_EOL;

        // Insert cities (takes longest)
        echo 'Starting cities' . PHP_EOL;
        $file = fopen($this->base . 'cities.txt', 'r');

        // Skip first line
        fgets($file, 4096);

        // Keep track of completed Ids
        $doneCities     = array();
        $lastCountry    = false;
        $skipped        = array();

        // Foreach city
        while (($line = fgets($file, 4096)) !== false) {

            // Construct SQL
            list($countryCode, $asciiName, $cityName, $regionCode, $pop, $lat, $long) = explode(',', $line);

            $asciiName      = $this->escape($asciiName);
            $cityName       = $this->escape($cityName);
            $regionCode     = $this->escape($regionCode);
            $countryCode    = strtoupper($countryCode);

            // Get country Id based on mapping
            if (!isset($countryMapping[strtoupper($countryCode)])) {
                if (!isset($skipped[$countryCode])) {
                    echo 'Skipping cities in country: ' . $countryCode . '. Not a valid country code' . PHP_EOL;
                    $skipped[$countryCode] = true;
                }
                continue;
            }
            $countryCode = $countryMapping[strtoupper($countryCode)];

            // Let's reset done cities so it does not grow out of proportion
            if ($lastCountry == false || $lastCountry != $countryCode){
                $doneCities = array();
                $lastCountry = $countryCode;
            }

            $sql = "INSERT INTO cities (country_id, ascii_name, city_name, region_id) VALUES
            ('$countryCode', '$asciiName', '$cityName', '$regionCode')";

            /*
             * If latitude/longitude already processed, do not add it again
             * The issue with maxmind that it stores multiple variation of the same location
             */
            $key = md5($lat . $long);
            if (isset($doneCities[$key])) {
                continue;
            }
            $doneCities[$key] = true;

            // Add to Db
            $db->exec($sql);
        }
        fclose($file);
        echo 'Finished cities' . PHP_EOL;

        // Close connections
        if (stripos(get_resource_type($db), 'mysql') !== false) {
            mysql_close($db);
            return;
        }

        // Close Db connection
        $db = null;
        return;
    }

    /**
     * Escape Db Argument
     *
     * @param   string  $argument
     *
     * @return  string
     */
    public function escape($argument)
    {
        $searches = array('\\', "\0", "\n", "\r", "'", '"', "\x1a");
        $replaces = array('\\\\', '\\0', '\\n', '\\r', "\\'", '\\"', '\\Z');
        return str_replace($searches, $replaces, $argument);
    }
}

$process = new processCities();
$process->process();