PHP Classes

File: bookmarks_checker.php

Recommend this page to a friend!
  Classes of Martin Latter   Bookmarks Checker for Chrome and Firefox   bookmarks_checker.php   Download  
File: bookmarks_checker.php
Role: Example script
Content type: text/plain
Description: Example script
Class: Bookmarks Checker for Chrome and Firefox
Check browser bookmark files to identify dead URLs
Author: By
Last change:
Date: 5 years ago
Size: 2,066 bytes
 

Contents

Class file image Download
#!/usr/bin/env php
<?php

/**
    * Bookmarks Checker
    *
    * Verify links in a Chrome or Firefox exported bookmarks file using cURL multi.
    *
    * Usage: php bookmarks_checker.php [file]
    *
    * @author Martin Latter
    * @copyright Martin Latter 15/01/2019
    * @version 0.08
    * @license GNU GPL version 3.0 (GPL v3); http://www.gnu.org/licenses/gpl.html
    * @link https://github.com/Tinram/Bookmarks-Checker.git
*/


require('classes/url_checker.class.php');

use
Tinram\URLChecker2\URLChecker2;

define('DUB_EOL', PHP_EOL . PHP_EOL);
define('DEFAULT_FILE', 'bookmarks.html');
define('LOG_FILE', 'bookmarks_checker.log');
define('BATCH_SIZE', 200); # size of each cURL request batch


/* filename */
if ( ! isset($_SERVER['argv'][1]))
{
    if (
file_exists(DEFAULT_FILE))
    {
       
$sFilename = DEFAULT_FILE;
    }
    else
    {
       
$sUsage =
           
PHP_EOL . ' ' .
           
str_replace('_', ' ', ucwords(basename(__FILE__, '.php'), '_')) .
           
DUB_EOL .
           
"\tusage: " . basename(__FILE__) . ' [filename]' .
           
DUB_EOL;

        die(
$sUsage);
    }
}
else
{
   
$sFilename = $_SERVER['argv'][1];
}

/* no such file */
if ( ! file_exists($sFilename))
{
    die(
PHP_EOL . ' ' . $sFilename . ' does not exist in this directory!' . DUB_EOL);
}


$sHtml = file_get_contents($sFilename);

$rxPattern = '/<a\s[^>]*href=\"([^\"]*)\"[^>]*>(.*)<\/a>/siU'; /* avoid attributes: by chirp.com.au */

preg_match_all($rxPattern, $sHtml, $aMatches, PREG_SET_ORDER);


$aLinks = [];

foreach (
$aMatches as $aLinkEntity)
{
   
$aLinks[] = [ 'url' => $aLinkEntity[1], 'name' => $aLinkEntity[2] ];
}

if (empty(
$aLinks))
{
    die(
' No links extracted from ' . $sFilename . DUB_EOL);
}

echo
PHP_EOL . ' ' . count($aLinks) . ' links being checked ...' . DUB_EOL;

$oChecker = new URLChecker2($aLinks);

echo
PHP_EOL . ' ' . $oChecker->getURLFails() . ' links failed';
echo
PHP_EOL . ' ' . ($oChecker->getURLTotal() - $oChecker->getURLFails()) . ' links verified' . DUB_EOL;