Login   Register  
PHP Classes
elePHPant
Icontem

File: php-mail-extractor.php

Recommend this page to a friend!
Stumble It! Stumble It! Bookmark in del.icio.us Bookmark in del.icio.us
  Classes of Vidar Vestnes  >  PHP Mail Extractor Script  >  php-mail-extractor.php  >  Download  
File: php-mail-extractor.php
Role: Application script
Content type: text/plain
Description: Extract mail addresses from your harddrive and saves to a file.
Class: PHP Mail Extractor Script
Find and extract email addresses from files
Author: By
Last change:
Date: 2013-04-04 16:29
Size: 3,976 bytes
 

Contents

Class file image Download
<?php
/**
 *
 * PHP Mail Extractor v1.0
 *
 * Scan your disk and store all found email-addresses to a file.
 *
 * Recursivly scans all subfolders of the given start dir.
 *
 * Example of use from command prompt:
 * php email-extract.php "c:/myfolder" "c:/mail-list.txt"
 *
 *
 * By Vidar Vestnes 2013
 *
 * Require: PHP v4.4
 *
 */

/**
 * Add new function to make script PHP4 compatible.
 */
if (!function_exists('file_put_contents'))
{
    function 
file_put_contents($filename$data)
    {
        
$f = @fopen($filename'a+');
        if (!
$f)
        {
            return 
false;
        }
        else
        {
            
$bytes fwrite($f$data);
            
fclose($f);
            return 
$bytes;
        }
    }
}

class 
PhpMailExtractor{
    
    
/**
     * Scan your disk and store all found email-addresses to a file.
     *
     * Recursivly scans all subfolders of the given start dir.
     *
     * Example of use from command prompt:
     * php email-extract.php "c:/myfolder" "c:/mail-list.txt"
     *
     * @param string $dir Directory where to start the scan
     * @param string $targetFile Filename where to store the result.
     * @param boolean $flushTargetFile Clear/empty the target file [Optional]
     * @param array $ignoreEmails If you wish to exclude some email addresses [Optional]
     */
    
function extract($dir$targetFile$flushTargetFile, &$ignoreEmails)
    {
        if(
$flushTargetFile && is_file($targetFile))
        {
            @
unlink($targetFile);
        }
    
        
$regex '/[_a-z0-9-]+(\.[_a-z0-9-]+)*@[a-z0-9-]+(\.[a-z0-9-]+)*(\.[a-z]{2,3})/i';
    
        
$cdir = array();
        
$dh  = @opendir($dir);
        while (
false !== ($filename = @readdir($dh))) {
            
$cdir[] = $filename;
        }
    
        if(!empty(
$cdir))
        {
            foreach (
$cdir as $key => $value)
            {
                if (!
in_array($value,array(".","..")))
                {
                    
$path $dir DIRECTORY_SEPARATOR $value;
                    if (
is_dir($path))
                    {
                        
$this->extract($path$targetFilefalse$ignoreEmails);
                    }
                    else if(
is_file($path) && filesize($path) < 10000)
                    {
                        
$emails = array();
                        
$content = @file_get_contents($path);
                        
$matches = array(); //create array
                        
preg_match_all($regex$content$matches); //find matching pattern
                            
                        
if(count($matches[0]))
                        {
                            foreach(
$matches[0] as $email)
                            {
                                
// Avoid storing duplicates
                                
$lc strtolower($email);
                                if(!isset(
$ignoreEmails[$lc]))
                                {
                                    echo 
$lc."\n";
                                    
$append file_put_contents($targetFile$lc "\n"FILE_APPEND);
                                    if(
$append){
                                        
$ignoreEmails[$lc] = 1;
                                    }
                                    else{
                                        echo 
"\n ERROR: Could not write to [outputfile]\n";
                                        exit;
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }    
}



if(!isset(
$argv[1]) || !isset($argv[2]))
{
    echo 
"\n";
    echo 
"******************************************************\n";
    echo 
" PHP Mail Extractor v1.0\n";
    echo 
"******************************************************\n";
    echo 
"\n";
    echo 
" Scan your disk/folder recursivly and stores \n";
    echo 
" all found email-addresses to a file.\n";
    echo 
"\n";
    echo 
" Only files less than 10Kb is scanned.\n";
    echo 
"\n";
    echo 
" Target file will always be flushed at start.\n";
    echo 
"\n";
    echo 
" How to use:\n";
    echo 
"  php-email-extractor.exe [start-folder] [outputfile]\n";
    echo 
"\n";
    echo 
" Example:\n";
    echo 
"  php-email-extractor.exe \"c:\\myfolder\" \"c:\\mail-list.txt\"\n";
    echo 
"\n";
    echo 
"\n";
    echo 
"\n DISCLAIMER \n";
    echo 
" This software is free, but used with no warranties\n";
    echo 
" The developer can not be hold responsible for any damage,\n";
    echo 
" directly or indirectly for use of this software.\n";
    echo 
"\n";
    echo 
"\n";
    echo 
" By Vidar Vestnes - April 2013\n";
    echo 
"\n";
    exit;
}
else if(!
is_dir($argv[1]))
{
    echo 
" [start-folder] is not an accessable folder";
}
$arr = array();
$extractor = new PhpMailExtractor();
$extractor->extract($argv[1], $argv[2], true$arr);