PHP Classes

File: engine/handler.robots.php

Recommend this page to a friend!
  Classes of Kristo Vaher   Wave Framework   engine/handler.robots.php   Download  
File: engine/handler.robots.php
Role: Application script
Content type: text/plain
Description: Robots Handler
Class: Wave Framework
MVC framework for building Web sites and APIs
Author: By
Last change: Update of engine/handler.robots.php
Date: 8 months ago
Size: 6,969 bytes
 

Contents

Class file image Download
<?php
/**
 * Wave Framework <http://github.com/kristovaher/Wave-Framework>
 * Robots Handler
 *
 * Robots Handler is used to return robots.txt files, if a request is made to such a file. This
 * handler either returns the existing /robots.txt file, or generates a new one that allows
 * all-access to robots. Robot directives for search engines and other crawlers are actually
 * stored on files and pages themselves, so it is not needed to specifically allow or deny
 * anything through robots.txt file. Robots Handler also pinpoints to sitemap.xml file.
 *
 * @package Index Gateway
 * @author Kristo Vaher <kristo@waher.net>
 * @copyright Copyright (c) 2012, Kristo Vaher
 * @license GNU Lesser General Public License Version 3
 * @tutorial /doc/pages/handler_robots.htm
 * @since 1.5.0
 * @version 3.5.0
 */
// INITIALIZATION
// Stopping all requests that did not come from Index Gateway
   
if(!isset($resourceAddress)){
       
header('HTTP/1.1 403 Forbidden');
        die();
    }
// Robots.txt file is always returned in plain text format
   
header('Content-Type: text/plain;charset=utf-8;');
   
   
// This flag stores whether cache was used
   
$cacheUsed=false;
// Default cache timeout of one month, unless timeout is set
   
if(!isset($config['robots-cache-timeout'])){
       
$config['robots-cache-timeout']=14400; // Four hours
   
}
// GENERATING ROBOTS FILE
// Robots file is generated only if it does not exist in root
   
if(!file_exists(__ROOT__.'robots.txt')){
   
       
// ASSIGNING PARAMETERS FROM REQUEST
// If filename includes & symbol, then system assumes it should be dynamically generated
           
$parameters=array_unique(explode('&',$resourceFile));
// Looking for cache
           
$cacheFilename=md5('robots.txt&'.$config['version-system'].'&'.$config['version-api'].'&'.$resourceRequest).'.tmp';
           
$cacheDirectory=__ROOT__.'filesystem'.DIRECTORY_SEPARATOR.'cache'.DIRECTORY_SEPARATOR.'resources'.DIRECTORY_SEPARATOR.substr($cacheFilename,0,2).DIRECTORY_SEPARATOR;
// If cache file exists then cache modified is considered that time
           
if(file_exists($cacheDirectory.$cacheFilename)){
               
$lastModified=filemtime($cacheDirectory.$cacheFilename);
            } else {
               
// Otherwise it is server request time
               
$lastModified=$_SERVER['REQUEST_TIME'];
            }
           
       
// GENERATING NEW ROBOTS FILE OR LOADING FROM CACHE
// If robots cannot be found from cache, it is generated
           
if(in_array('nocache',$parameters) || ($lastModified==$_SERVER['REQUEST_TIME'] || $lastModified<($_SERVER['REQUEST_TIME']-$config['robots-cache-timeout']))){
           
               
// STATE AND DATABASE
               
                    // State stores a lot of settings that are taken into account during Sitemap generation
                   
require(__ROOT__.'engine'.DIRECTORY_SEPARATOR.'class.www-state.php');
                   
$state=new WWW_State($config);
// Connecting to database, if configuration is set
                    // Uncomment this if you actually need to use database connection for robots.txt file
                    // if(isset($config['database-name']) && $config['database-name']!='' && isset($config['database-type']) && isset($config['database-host']) && isset($config['database-username']) && isset($config['database-password'])){
                        // require(__ROOT__.'engine'.DIRECTORY_SEPARATOR.'class.www-database.php');
                        // $databaseConnection=new WWW_Database($config['database-type'],$config['database-host'],$config['database-name'],$config['database-username'],$config['database-password'],((isset($config['database-errors']))?$config['database-errors']:false),((isset($config['database-persistent']))?$config['database-persistent']:false));
                    // }
                   
                // GENERATING ROBOTS STRING
               
                    // Robots string is stored here
                   
$robots='';
                   
$robots.='User-agent: *'."\n";
                   
$robots.='Disallow: '."\n";
                   
$robots.='Sitemap: '.((isset($config['limiter-https']) && $config['limiter-https']==true)?'https://':'http://').$_SERVER['HTTP_HOST'].$state->data['url-web'].'sitemap.xml';
                   
               
// WRITING TO CACHE
           
                    // Resource cache is cached in subdirectories, if directory does not exist then it is created
                   
if(!is_dir($cacheDirectory)){
                        if(!
mkdir($cacheDirectory,0755)){
                           
trigger_error('Cannot create cache folder',E_USER_ERROR);
                        }
                    }
                   
// Data is written to cache file
                   
if(!file_put_contents($cacheDirectory.$cacheFilename,$robots)){
                       
trigger_error('Cannot create resource cache',E_USER_ERROR);
                    }
           
            } else {
               
// Setting the flag for logger
               
$cacheUsed=true;
            }
           
       
// HEADERS
       
            // If cache is used, then proper headers will be sent
           
if(in_array('nocache',$parameters)){
               
// user agent is told to cache these results for set duration
               
header('Cache-Control: no-cache,no-store');
               
header('Expires: '.gmdate('D, d M Y H:i:s',$_SERVER['REQUEST_TIME']).' GMT');
               
header('Last-Modified: '.gmdate('D, d M Y H:i:s',$lastModified).' GMT');
            } else {
               
// user agent is told to cache these results for set duration
               
header('Cache-Control: public,max-age='.$config['robots-cache-timeout']);
               
header('Expires: '.gmdate('D, d M Y H:i:s',($_SERVER['REQUEST_TIME']+$config['robots-cache-timeout'])).' GMT');
               
header('Last-Modified: '.gmdate('D, d M Y H:i:s',$lastModified).' GMT');
            }
           
           
// Content length of the file
           
$contentLength=filesize($cacheDirectory.$cacheFilename);
// Content length is defined that can speed up website requests, letting user agent to determine file size
           
header('Content-Length: '.$contentLength);
           
       
// OUTPUT
// Returning the file to user agent
           
readfile($cacheDirectory.$cacheFilename);
// File is deleted if cache was requested to be off
           
if(in_array('nocache',$parameters)){
               
unlink($cacheDirectory.$cacheFilename);
            }
       
    } else {
       
       
// RETURNING EXISTING ROBOTS FILE
       
            // This is technically considered as using cache
           
$cacheUsed=true;
           
           
// Cache headers
           
header('Cache-Control: public,max-age='.$config['robots-cache-timeout']);
           
header('Expires: '.gmdate('D, d M Y H:i:s',($_SERVER['REQUEST_TIME']+$config['robots-cache-timeout'])).' GMT');
           
// Last modified header
           
header('Last-Modified: '.gmdate('D, d M Y H:i:s',filemtime(__ROOT__.'robots.txt')).' GMT');
           
// Content length of the file
           
$contentLength=filesize(__ROOT__.'robots.txt');
           
// Content length is defined that can speed up website requests, letting user agent to determine file size
           
header('Content-Length: '.$contentLength);
           
// Since robots.txt did exist in root, it is simply returned
           
readfile(__ROOT__.'robots.txt');
}
   
// WRITING TO LOG
// If Logger is defined then request is logged and can be used for performance review later
   
if(isset($logger)){
       
// Assigning custom log data to logger
       
$logger->setCustomLogData(array('category'=>'robots','cache-used'=>$cacheUsed,'content-length-used'=>$contentLength,'database-query-count'=>((isset($databaseConnection))?$databaseConnection->queryCounter:0)));
       
// Writing log entry
       
$logger->writeLog();
    }
?>