PHP Classes

File: utils.php

Recommend this page to a friend!
  Classes of Artur Barseghyan   PHP TLD Domain Name   utils.php   Download  
File: utils.php
Role: Class source
Content type: text/plain
Description: Main file
Class: PHP TLD Domain Name
Determine the Top Level Domain from a given URL
Author: By
Last change: Adding options
Date: 11 years ago
Size: 6,796 bytes
 

Contents

Class file image Download
<?php
/**
 * @package Tld
 * @author Artur Barseghyan (artur.barseghyan@gmail.com)
 * @version 0.1
 * @license MPL 1.1/GPL 2.0/LGPL 2.1
 * @link http://bitbucket.org/barseghyanartur/php-tld
 *
 * Gets top level domains from a URL given. List of TLD names is taken from
 * http://mxr.mozilla.org/mozilla/source/netwerk/dns/src/effective_tld_names.dat?raw=1
 */
require 'exceptions.php';

/**
 * Main Tld class. Does all the job.
 *
 * @example
 * require 'utils.php';
 * echo Tld::getTld('http://www.google.co.uk'); // good pattern, echoes 'google.co.uk'
 * echo Tld::getTld('http://www.me.congresodelalengua3.ar'); // good pattern, echoes 'me.congresodelalengua3.ar'
 * echo Tld::getTld('http://www.v2.google.co.uk'); // good pattern, echoes 'google.co.uk'
 * echo Tld::getTld('/index.php?a=1&b=2'); // bad pattern, raises <TldBadUrl> exception
 * echo Tld::getTld('v2.www.google.com'); // bad pattern, raises <TldBadUrl> exception
 * echo Tld::getTld('http://www.tld.doesnotexist'); // bad pattern, raises <TldDomainNotFound> exception
 */
class Tld {
   
/**
     * Container for TLD names.
     * @var <array>
     */
   
private static $tldNames = array();

   
/**
     * URL to read the original source of TLD names from.
     */
   
const NAMES_SOURCE_URL = 'http://mxr.mozilla.org/mozilla/source/netwerk/dns/src/effective_tld_names.dat?raw=1';

   
/**
     * Local path to the TLD names file.
     */
   
const NAMES_LOCAL_PATH = 'res/effective_tld_names.dat.txt';

   
/**
     * Initializes $tldNames array if empty. Throws a <TldIOError> exception in case of read/write errors or returns
     * boolean false if <$failSilently> has been set to true.
     *
     * @static
     * @param <int> $retryCount
     * @param <bool> $failSilently
     * @return <array>
     * @throws <TldIOError>
     */
   
public static function init($retryCount = 0, $failSilently = false) {
       
// If number of retries exceeds 1, we throw an exception to avoid infinite loops
       
if ($retryCount > 1) {
            if (
$failSilently)
                return
false;
             else
                throw new
TldIOError();
        }

       
// If $tldNames is not empty, we return its' value
       
if (count(self::$tldNames) > 0)
            return
self::$tldNames;

       
// Try to read the file. If something fails, we try to grab the file and recursively run the "init" again.
       
$localFile = @fopen(dirname(__FILE__) . DIRECTORY_SEPARATOR . self::NAMES_LOCAL_PATH, 'r');

       
// If file not opened, obtain the TLD names file from Mozilla's website and try again (recursion)
       
if (false === $localFile) {
           
self::updateTldNames();
            return
self::init(++$retryCount);
        }

       
// Read file line by line.
       
while(false !== ($line = fgets($localFile))) {
           
$line = trim($line);
            if (
false == preg_match('/^[\/\n]/', $line) && strlen($line) > 0) {
               
self::$tldNames[] = $line;
            }
        }
       
fclose($localFile);

        return
self::$tldNames;
    }

   
/**
     * Extracts the TLD from the URL given. Returns a string. May throw <TldBadUrl> or <TldDomainNotFound>
     * exceptions if there's bad URL provided or no TLD match found respectively. In case if <$failSilently> has been
     * set to true, returns boolean false on failure, instead of raising an exception.
     *
     * @static
     * @param <str> $url
     * @param <bool> $activeOny: If set to true, only active TLDs are matched against.
     * @param <bool> $failSilently
     * @return <str>
     * @throws <TldBadUrl> or <TldDomainNotFound>
     */
   
public static function getTld($url, $activeOny = false, $failSilently = false) {
       
self::init();

       
// Parsing URL
       
$parsedUrl = parse_url($url);
       
// Checking if we have the host key and it's not empty
       
if (array_key_exists('host', $parsedUrl) && $parsedUrl['host'])
           
$domainName = $parsedUrl['host'];
        else {
            if (
$failSilently)
                return
false;
            else
                throw new
TldBadUrl($url); // Bad URL
       
}

       
// Splitting the parts by '.'
       
$domainParts = explode('.', $domainName);

       
// Looping from much to less (for example if we have a domain named "v3.api.google.co.uk" we'll try
        // "v3.api.google.co.uk", then "api.google.co.uk", then "api.google.co.uk", then "google.co.uk", then
        // "co.uk" and finally "uk". If the last one does not match any TLDs, we throw a <TldDomainNotFound>
        // exception.
       
for ($i = 0; $i < count($domainParts); $i++) {
           
$slicedDomainParts = array_slice($domainParts, $i); // Sliced URL

           
$match = implode('.', $slicedDomainParts); // Exact match
           
$wildcardMatch = '*.' . implode('.', array_slice($slicedDomainParts, 1)); // Wildcard match

           
if (in_array($match, self::$tldNames) || in_array($wildcardMatch, self::$tldNames)) {
                return
implode('.', array_slice($domainParts, $i - 1));
            }

            if (!
$activeOny) {
               
$inactiveMatch = '!' . $match; // No longer active domains, still may occur.
               
if (in_array($inactiveMatch, self::$tldNames))
                    return
implode('.', array_slice($domainParts, $i - 1));
            }
        }

        if (
$failSilently)
            return
false;
        else
            throw new
TldDomainNotFound($domainName);
    }

   
/**
     * Updates the local TLD names file. Throws a <TldIOError> exception in case of read/write errors or returns
     * boolean false if <$failSilently> has been set to true.
     *
     * @static
     * @param <bool> $failSilently
     * @return <bool>
     * @throws <TldIOError>
     */
   
public static function updateTldNames($failSilently = false) {
       
$res = false;
        try {
           
$res = file_put_contents(
               
dirname(__FILE__) . DIRECTORY_SEPARATOR . self::NAMES_LOCAL_PATH,
               
file_get_contents(self::NAMES_SOURCE_URL)
                );
        } catch(
Exception $e) {
            if (
$failSilently)
                return
false;
            else
                throw new
TldIOError();
        }

        if (
false == $res) {
            if (
$failSilently)
                return
false;
            else
                throw new
TldIOError();
        }
        return
true;
    }

   
/**
     * Returns the array of TLD names.
     *
     * @static
     * @return <array>
     */
   
public static function getTldNames() {
        return
self::$tldNames;
    }
}