PHP Classes

File: crawler/crawler.php

Recommend this page to a friend!
  Classes of Faris AL-Otabi   Searchy PHP Search Engine   crawler/crawler.php   Download  
File: crawler/crawler.php
Role: Example script
Content type: text/plain
Description: Example script
Class: Searchy PHP Search Engine
Crawl, index and search multiple Web sites
Author: By
Last change:
Date: 1 year ago
Size: 1,562 bytes
 

Contents

Class file image Download
<?php

include_once '../vendor/autoload.php';
include_once
'../config.php';
include_once
'../src/Database.php';
include_once
'../src/SearchEngine.php';

$httpClient = new \GuzzleHttp\Client();

$file = fopen('urls.txt', "r");
$text = trim(fread($file, filesize('urls.txt')));
$urls = explode(PHP_EOL, $text);

$indexed_sites = [];

foreach (
$urls as $url) {

    echo
"Indexing {$url}" . PHP_EOL;

   
$response = $httpClient->get($url);

   
$htmlString = (string) $response->getBody();

   
libxml_use_internal_errors(true);

   
$htmlString = mb_convert_encoding($htmlString, 'HTML-ENTITIES', "UTF-8");

   
$doc = new DOMDocument();

   
$doc->loadHTML($htmlString);

   
$xpath = new DOMXPath($doc);

   
$titles = $xpath->evaluate('/html/head/title');
   
$descriptions = $xpath->evaluate('/html/head/meta[@name="description"]/@content');
   
$keywords = $xpath->evaluate('/html/head/meta[@name="keywords"]/@content');

   
$title = '';

   
$description = '';

   
$keyword = '';

    foreach (
$titles as $t) {
       
$title = $t->textContent;
    }

    foreach (
$descriptions as $d) {
       
$description = $d->textContent;
    }

    foreach (
$keywords as $k) {
       
$keyword = $k->textContent;
    }

    if (
$title == '') {
       
$title = $url;
    }

   
array_push($indexed_sites, [
       
'title' => $title,
       
'blurb' => $description,
       
'keywords' => $keyword,
       
'url' => $url
   
]);
}

$db = new Database($config);
$index = new SearchEngine($db);

$index->addSites($indexed_sites);

echo
"Indexing Finished" . PHP_EOL;