PHP Classes

File: maildir2mbox.php

Recommend this page to a friend!
  Classes of Charalampos Pournaris   Maildir To Mailbox   maildir2mbox.php   Download  
File: maildir2mbox.php
Role: Class source
Content type: text/plain
Description: The php file for all the work
Class: Maildir To Mailbox
Convert messages from maildir to mbox format
Author: By
Last change: Fixed a bug in fopen, w -> 'w' thanks to Dave Silvester
Date: 14 years ago
Size: 17,579 bytes
 

Contents

Class file image Download
<?php /** * Maildir (kmail, qmail etc.) to MailBox (Thunderbird, kmail ..) convertion script * * I wrote this script because I needed to convert a quite big (~5GB) maildir directory structure * to an mbox file structure in order to open it from my thunderbird mail client. I was unable * to find a script/program which could work with such a big directory (or work at all) so i * wrote my own. * * You should invoke it this way: # php maildir2mbox.php Maildir (or Maildir/, * it's the same), where Maildir is your top level mail directory. * * If you add #!/usr/bin/php (the path where your php binary is located) on top of the script * and chmod it +x then you can invoke it with ./maildir2mbox.php Maildir/. * * A subdir named 'mboxfiles' will be created in the same directory where the script is and all * the files will be placed there. Each mbox file is named according to the subdirectory * that the file belongs to. For example the mbox files for the Draft directory will be called * DraftX where X is a number for each file (if dir is bigger than 2 GB). An mbox file won't * overflow the signed integer size limit (which is ~2GB) but will be splited to more files as * described above. * * I hope this script will be useful to you :) * * UPDATED INFO (24-11-2008) * ------------------------- * * I have made some changes to the script. Your maildir directory should look like this: * * mail/ * inbox/ * cur/ * new/ * tmp/ * sent/ * cur/ * new/ * tmp/ * other directory/ * cur/ * new/ * tmp/ * * You should place the script in the same level as the mail directory (in the above example) * and use the command: php maildir2mbox.php mail (or mail/, * it's the same). * * All the mbox generated files will be placed in a directory called 'mboxfiles'. The filenames should * be inbox[.mbox], sent[.mbox] for the above example * * -- END OF THE UPDATED INFO SECTION -- * * UPDATED INFO (26-11-2008) * ------------------------- * * - No extension is added to the output files by default * * - Added support for subdirectory parsing (any level) * * - Added two command line options: * -e Will add an .mbox extension to the output files * -x Will remove output files which are empty * -h Will print a help output describing options * * -- END OF THE UPDATED INFO SECTION -- * * @author Charalampos 'DiAvOl' Pournaris <charpour@gnet.gr> * @version 3.0 * @package converter */ // flush on each output operation ob_implicit_flush(); // Do not timeout this script set_time_limit(0); /********************** FUNCTION DEFINITIONS **********************/ /** * Print a message to the console. Die if iserror is true * * @param string $msg The message to print * @param boolean $iserror If it is true kill the program */ function printConsole($msg, $iserror=false) { print($msg."\n"); if ($iserror) { exit(1); } } /** * Print script usage information * * @param boolean $error */ function print_usage($error=false) { global $program_name; print <<<OUTPUT Usage: $program_name [options] Maildir -h Print this help information -e Add an .mbox extension to the output files -x Remove empty mbox files\n\n OUTPUT; exit($error ? 1 : 0); } /** * Check if an int variable is overflowed * * @param $val An integer value * @return boolean */ function intOverflowed($val) { return gettype($val) == 'integer' ? false : true; } /** * Determines if the file is a maildir file * * @param string $name File name to check * @return boolean */ function isMaildirFile($name) { return preg_match(FILE_FORMAT, trim($name)); } /** * Change the default memory limit * <i>NOTE: If the new size is less than 8M it will be set to 8M</i> * * @param string $newlimit The new limit to be set * @return string|boolean */ function setMemoryLimit(&$newlimit) { $retval = false; $oldlimit = ''; if(intval($newlimit) < 8) { $newlimit = '8M'; } if ( ($oldlimit = ini_set('memory_limit',$newlimit)) !== false) { $retval = $oldlimit; } return $retval; } /** * Parse the given directory. * * Optionally get all the hidden files too and ignore * filenames in the exception array. * * @param string $dir * @param boolean $gethidden * @param array $exceptions * @return array */ function parseDirectory($dir, $gethidden=false, $exceptions=array()) { $fp = null; $dirs = array(); $hdirs = array(); $files = array(); $hfiles = array(); if ($fp = opendir($dir)) { while (false !== ($file = readdir($fp))) { $filename = $dir.$file; if ((!$gethidden && $file{0} == '.')) continue; elseif ($file == '.' || $file == '..') continue; if (is_file($filename)) if ($file{0} == '.') $hfiles[$file] = $filename; else $files[$file] = $filename; elseif (is_dir($filename)) { if (in_array(substr($filename, strlen($dir)),$exceptions)) continue; if ($file{0} == '.') $hdirs[$file] = $filename.DIR_SEPERATOR; else $dirs[$file] = $filename.DIR_SEPERATOR; } } closedir($fp); } return array('dirs'=>$dirs,'hdirs'=>$hdirs,'files'=>$files,'hfiles'=>$hfiles); } /** * Parses a directory tree using recursion * * @param DirectoryObj $root */ function parseTree($root, $path) { $dirtree = parseDirectory($path, true, array('mboxfiles')); foreach ($dirtree['dirs'] as $dname=>$dpath) { $mboxdir = new DirectoryObj($dname, $dpath, $root->getMboxPath().'/'.$dname.'.sbd'); $root->addChild($mboxdir); } foreach ($dirtree['hdirs'] as $hdname=>$hdpath) { $parent_name = substr($hdname,1,strpos($hdname,'.',1)-1); parseTree($root->getDirByName($parent_name), $hdpath); } } /** * Generate the target mbox files * * @param DirectoryObj $root */ function makeTree($root) { if ($root->hasChild()) mkdir($root->getMboxPath()); foreach ($root->getChild() as $chname=>$chobj) { printConsole('******************* Processing directory '.$chobj->getFullPath().' *******************'); $dir = new DirParser($chobj->getFullPath(), $chobj->getMboxPath()); $dir->parse(); if ($dir->getFileCount() == 0) { printConsole("No files found in this directory.."); } else { $dir->printSummary(); } unset($dir); $dir = null; printConsole('*************************** Finished ***************************'); printConsole("\n"); makeTree($chobj); } } /********************** CLASS DEFINITIONS **********************/ class DirectoryObj { private $name = ''; private $fullpath = ''; private $mboxpath = ''; private $child = array(); public function __construct($name, $fullpath, $mboxpath) { $this->name = $name; $this->fullpath = $fullpath; $this->mboxpath = $mboxpath; } public function getName() { return $this->name; } public function getFullPath() { return $this->fullpath; } public function getMboxPath() { return $this->mboxpath; } public function setMboxPath($p) { $this->mboxpath = $p; } /** * Add a child directory * * @param DirectoryObj $child */ public function addChild($child) { $this->child[$child->getName()] = $child; } /** * Returns all the child elements * * @return DirectoryObj */ public function getChild() { return $this->child; } /** * Returns a child directory given it's name * * @param string $name * @return DirectoryObj */ public function getDirByName($name) { return $this->child[$name]; } public function hasChild() { return !empty($this->child); } } class DirParser { private $fulldir = ''; private $dirname = ''; private $mdirfiles = 0; private $parsedfiles = 0; private $bogusfiles = 0; private $output_fn = ''; private $dirhandle = null; private $mboxhnd = null; private $mboxfn = null; private $mboxfilecnt = 1; private $totalsize = 0; private $chunks = 0; private static $total_converted = 0; private static $total_failed = 0; public function __construct($fdir, $output) { $this->fulldir = $fdir; $this->output_fn = substr($output, 0, strrpos($output, '/')+1); if ($this->dirhandle = opendir($fdir)) { $this->dirname = substr($fdir,0,strlen($fdir)-1); $this->dirname = substr($this->dirname,strrpos($this->dirname,DIR_SEPERATOR)+1); } else { printConsole("Error occured while opening directory: ".$fdir, true); } } private function openFile() { global $maildir, $mbox_extension; $this->mboxfn = $this->output_fn.$this->dirname.($this->mboxfilecnt == 1 ? '' : $this->mboxfilecnt).($mbox_extension ? '.mbox' : ''); $this->mboxhnd = fopen($this->mboxfn,'w'); if (!$this->mboxhnd) { printConsole("Error occured while opening mbox file for writing", true); } } private function closeFile() { global $remove_empty; if (is_resource($this->mboxhnd)) { fclose($this->mboxhnd); } unset($this->mboxhnd); $this->mboxhnd = null; if ($remove_empty && $this->mdirfiles == 0) { unlink($this->mboxfn); } } public function parse() { $this->openFile(); while (false !== ($rfile = readdir($this->dirhandle))) { if ($rfile == '..' || $rfile == '.' || !is_dir($this->fulldir.$rfile)) continue; $curhandle = opendir($this->fulldir.$rfile); if (!$curhandle) { printConsole("[Error] Could not open directory: ".$this->fulldir.$rfile); } while (false !== ($file = readdir($curhandle))) { $filename = $this->fulldir.$rfile.'/'.$file; if (is_file($filename) && isMaildirFile($file)) { print("\rFilecount: ".(++$this->mdirfiles)."\tTotal size written to mbox file: ".$this->totalsize); $mdfile = new MailDirFile($filename); $mdfile->readHeader(); if (!$mdfile->foundAddress()) { printConsole("\nHeader not found for : ".$file.", file skipped"); $this->bogusfiles++; unset($mdfile); $mdfile = null; continue; } if (intOverflowed($this->totalsize + $mdfile->getTotalSize())) { $this->closeFile(); $this->mboxfilecnt++; $this->chunks++; $this->totalsize = 0; print("\n\t\t\t\t\r"); $this->openFile(); } ++$this->parsedfiles; $this->totalsize += $mdfile->getTotalSize(); fwrite($this->mboxhnd,$mdfile->getMboxHeader()."\n".$mdfile->getHeader()); $mdfile->streamWriteContents($this->mboxhnd); fwrite($this->mboxhnd, "\n"); unset($mdfile); $mdfile = null; } } closedir($curhandle); unset($curhandle); $curhandle = null; } $this->closeFile(); } public function getFileCount() { return $this->mdirfiles; } public function printSummary() { printConsole("\n".'------------------------ SUMMARY ------------------------'); printConsole("Wrote ".$this->mboxfilecnt." mbox files for this directory"); printConsole("Converted ".$this->parsedfiles.'/'.$this->mdirfiles." files successfully"); printConsole("Found ".$this->bogusfiles." files without a proper header and skipped them for this directory"); printConsole('---------------------------------------------------------'); self::$total_converted += $this->parsedfiles; self::$total_failed += $this->bogusfiles; } public function getTotalConverted() { return self::$total_converted; } public function getTotalFailed() { return self::$total_failed; } public function getDirName() { return $this->dirname; } public function __destruct() { if (is_resource($this->dirhandle)) closedir($this->dirhandle); unset($this->dirhandle); $this->dirhandle = null; } } class MailDirFile { private $filename = ''; private $filesize = ''; private $handler = null; private $mboxheader = 'From '; private $from = ''; private $dt = ''; private $header = ''; private $contents=''; private $foundaddr = false; private $fullmsg = ''; private static $search_array = array( "From:"=>0, "X-From-Line:"=>0, "Reply-To:"=>0, "from:"=>0 ); private $search_array_filled = array( "From:"=>'', "X-From-Line:"=>'', "Reply-To:"=>'', "from:"=>'' ); public function __construct($fname) { $this->filename = $fname; $this->dt = strtotime("Thu, 01 Jan 1970 00:00:00 +0000 (GMT)"); if (!$this->handler = fopen($fname, "r")) { printConsole("Error occured while opening file: ".$fname, true); } $this->getFileSize(); } public static function getSearchArray() { return self::$search_array; } private function cleanAddress($addr) { $addr = trim($addr); if ($addr{0} == '<') return substr($addr, 1,strlen($addr)-2); return $addr; } public function readContents() { while(!feof($this->handler)) { $this->contents .= fread($this->handler, 8192); } } public function streamWriteContents($fp) { while(!feof($this->handler)) { fwrite($fp, fread($this->handler, 8192)); } } public function readHeader() { while ( ($line = fgets($this->handler)) && $line{0} != "\n") { $foundfrom = false; foreach (self::$search_array as $search=>&$count) { if ($this->startsWith($search, $line)) { $foundfrom = true; $count++; $this->foundaddr = true; $linfo = explode(' ',$line); foreach ($linfo as $item) { if (strpos($item,'@') !== false) { $this->search_array_filled[$search] = $this->cleanAddress($item); break; } } break; } } if (!$foundfrom && $this->startsWith("Date:", $line)) { $this->dt = gmdate("D M d H:i:s Y",strtotime(substr($line,6))); } $this->header .= $line; } $this->header .= $line; $this->from = $this->search_array_filled['From:']; if (empty($this->from)) { $this->from = $this->search_array_filled['from:']; } if (empty($this->from)) { $this->from = $this->search_array_filled['Reply-To:']; } if (empty($this->from)) { $this->from = $this->search_array_filled['X-From-Line:']; } $this->mboxheader .= $this->from.' '.$this->dt; } public function foundAddress() { return $this->foundaddr; } public function printContents() { print($this->contents); } public function printHeader() { print($this->header); } public function getMboxHeader() { return $this->mboxheader; } public function getHeader() { return $this->header; } public function getContents() { return $this->contents; } public function getMboxMessage() { $this->fullmsg = $this->mboxheader."\n".$this->header.$this->contents."\n"; return $this->fullmsg; } public function getTotalSize() { return ($this->filesize += (strlen($this->mboxheader)+2)); } private function getFileSize() { fseek($this->handler,0,SEEK_END); $this->filesize = ftell($this->handler); fseek($this->handler,0,SEEK_SET); } private function startsWith($str, $line) { return $str == substr($line,0,strlen($str)); } public function getSize() { return $this->filesize; } public function __destruct() { if (is_resource($this->handler)) { fclose($this->handler); } unset($this->handler); $this->handler = null; } } /********************** MAIN CODE **********************/ define('DIR_SEPERATOR','/'); define('OUTPUT_DIR','mboxfiles'); define('FILE_FORMAT',"/^[0-9]*\.[a-zA-Z0-9_]*\.[a-zA-Z0-9\:\,]*/"); $mbox_extension = false; $remove_empty = false; $program_name = $argv[0]; foreach ($argv as $k=>$argument) { switch (trim($argument)) { case '-e': { $mbox_extension = true; unset($argv[$k]); $argc--; break; } case '-x': { $remove_empty = true; unset($argv[$k]); $argc--; break; } case '-h': { print_usage(); unset($argv[$k]); $argc--; break; } default: { if ($argument{0} == '-') { printConsole("Unknown Option: $argument"); print_usage(true); } } } } // Re-index argv $argv = array_values($argv); if ($argc != 2) { printConsole("No output directory specified"); print_usage(); } $maildir = $argv[1]; // Check if the last character of the given directory is the dir seperator, if not add it. if ($maildir{strlen($maildir)-1} != DIR_SEPERATOR) { $maildir .= '/'; } // If the given name is a file instead of a directory kill the script if (!is_dir($maildir)) { printConsole("$maildir is not a directory", true); } // Variable initialization $memory_limit = '32M'; $oldlimit = ''; printConsole('Setting memory limit...'); if ( ($oldlimit = setMemoryLimit($memory_limit)) === false ) { printConsole("Error setting memory limit to $memory_limit.", true); } else { printConsole("New memory limit: $memory_limit\nOld memory limit: $oldlimit"); } $rootnode = new DirectoryObj(substr($maildir,0,strpos($maildir, '/')), $maildir, OUTPUT_DIR); parseTree($rootnode, $rootnode->getFullPath()); makeTree($rootnode); printConsole("Finished processing all the directories.\nOutput files are located in directory: ".OUTPUT_DIR."/"); printConsole("Total files converted: ".DirParser::getTotalConverted()); printConsole("Total files failed: ".DirParser::getTotalFailed()); ?>