<?php
/**
* Maildir (kmail, qmail etc.) to MailBox (Thunderbird, kmail ..) convertion script
*
* I wrote this script because I needed to convert a quite big (~5GB) maildir directory structure
* to an mbox file structure in order to open it from my thunderbird mail client. I was unable
* to find a script/program which could work with such a big directory (or work at all) so i
* wrote my own.
*
* You should invoke it this way: # php maildir2mbox.php Maildir (or Maildir/,
* it's the same), where Maildir is your top level mail directory.
*
* If you add #!/usr/bin/php (the path where your php binary is located) on top of the script
* and chmod it +x then you can invoke it with ./maildir2mbox.php Maildir/.
*
* A subdir named 'mboxfiles' will be created in the same directory where the script is and all
* the files will be placed there. Each mbox file is named according to the subdirectory
* that the file belongs to. For example the mbox files for the Draft directory will be called
* DraftX where X is a number for each file (if dir is bigger than 2 GB). An mbox file won't
* overflow the signed integer size limit (which is ~2GB) but will be splited to more files as
* described above.
*
* I hope this script will be useful to you :)
*
* UPDATED INFO (24-11-2008)
* -------------------------
*
* I have made some changes to the script. Your maildir directory should look like this:
*
* mail/
* inbox/
* cur/
* new/
* tmp/
* sent/
* cur/
* new/
* tmp/
* other directory/
* cur/
* new/
* tmp/
*
* You should place the script in the same level as the mail directory (in the above example)
* and use the command: php maildir2mbox.php mail (or mail/,
* it's the same).
*
* All the mbox generated files will be placed in a directory called 'mboxfiles'. The filenames should
* be inbox[.mbox], sent[.mbox] for the above example
*
* -- END OF THE UPDATED INFO SECTION --
*
* UPDATED INFO (26-11-2008)
* -------------------------
*
* - No extension is added to the output files by default
*
* - Added support for subdirectory parsing (any level)
*
* - Added two command line options:
* -e Will add an .mbox extension to the output files
* -x Will remove output files which are empty
* -h Will print a help output describing options
*
* -- END OF THE UPDATED INFO SECTION --
*
* @author Charalampos 'DiAvOl' Pournaris <charpour@gnet.gr>
* @version 3.0
* @package converter
*/
// flush on each output operation
ob_implicit_flush();
// Do not timeout this script
set_time_limit(0);
/********************** FUNCTION DEFINITIONS **********************/
/**
* Print a message to the console. Die if iserror is true
*
* @param string $msg The message to print
* @param boolean $iserror If it is true kill the program
*/
function printConsole($msg, $iserror=false) {
print($msg."\n");
if ($iserror) {
exit(1);
}
}
/**
* Print script usage information
*
* @param boolean $error
*/
function print_usage($error=false) {
global $program_name;
print <<<OUTPUT
Usage: $program_name [options] Maildir
-h Print this help information
-e Add an .mbox extension to the output files
-x Remove empty mbox files\n\n
OUTPUT;
exit($error ? 1 : 0);
}
/**
* Check if an int variable is overflowed
*
* @param $val An integer value
* @return boolean
*/
function intOverflowed($val) {
return gettype($val) == 'integer' ? false : true;
}
/**
* Determines if the file is a maildir file
*
* @param string $name File name to check
* @return boolean
*/
function isMaildirFile($name) {
return preg_match(FILE_FORMAT, trim($name));
}
/**
* Change the default memory limit
* <i>NOTE: If the new size is less than 8M it will be set to 8M</i>
*
* @param string $newlimit The new limit to be set
* @return string|boolean
*/
function setMemoryLimit(&$newlimit) {
$retval = false;
$oldlimit = '';
if(intval($newlimit) < 8) {
$newlimit = '8M';
}
if ( ($oldlimit = ini_set('memory_limit',$newlimit)) !== false) {
$retval = $oldlimit;
}
return $retval;
}
/**
* Parse the given directory.
*
* Optionally get all the hidden files too and ignore
* filenames in the exception array.
*
* @param string $dir
* @param boolean $gethidden
* @param array $exceptions
* @return array
*/
function parseDirectory($dir, $gethidden=false, $exceptions=array()) {
$fp = null;
$dirs = array();
$hdirs = array();
$files = array();
$hfiles = array();
if ($fp = opendir($dir)) {
while (false !== ($file = readdir($fp))) {
$filename = $dir.$file;
if ((!$gethidden && $file{0} == '.'))
continue;
elseif ($file == '.' || $file == '..')
continue;
if (is_file($filename))
if ($file{0} == '.')
$hfiles[$file] = $filename;
else
$files[$file] = $filename;
elseif (is_dir($filename)) {
if (in_array(substr($filename, strlen($dir)),$exceptions))
continue;
if ($file{0} == '.')
$hdirs[$file] = $filename.DIR_SEPERATOR;
else
$dirs[$file] = $filename.DIR_SEPERATOR;
}
}
closedir($fp);
}
return array('dirs'=>$dirs,'hdirs'=>$hdirs,'files'=>$files,'hfiles'=>$hfiles);
}
/**
* Parses a directory tree using recursion
*
* @param DirectoryObj $root
*/
function parseTree($root, $path) {
$dirtree = parseDirectory($path, true, array('mboxfiles'));
foreach ($dirtree['dirs'] as $dname=>$dpath) {
$mboxdir = new DirectoryObj($dname, $dpath, $root->getMboxPath().'/'.$dname.'.sbd');
$root->addChild($mboxdir);
}
foreach ($dirtree['hdirs'] as $hdname=>$hdpath) {
$parent_name = substr($hdname,1,strpos($hdname,'.',1)-1);
parseTree($root->getDirByName($parent_name), $hdpath);
}
}
/**
* Generate the target mbox files
*
* @param DirectoryObj $root
*/
function makeTree($root) {
if ($root->hasChild())
mkdir($root->getMboxPath());
foreach ($root->getChild() as $chname=>$chobj) {
printConsole('******************* Processing directory '.$chobj->getFullPath().' *******************');
$dir = new DirParser($chobj->getFullPath(), $chobj->getMboxPath());
$dir->parse();
if ($dir->getFileCount() == 0) {
printConsole("No files found in this directory..");
} else {
$dir->printSummary();
}
unset($dir);
$dir = null;
printConsole('*************************** Finished ***************************');
printConsole("\n");
makeTree($chobj);
}
}
/********************** CLASS DEFINITIONS **********************/
class DirectoryObj {
private $name = '';
private $fullpath = '';
private $mboxpath = '';
private $child = array();
public function __construct($name, $fullpath, $mboxpath) {
$this->name = $name;
$this->fullpath = $fullpath;
$this->mboxpath = $mboxpath;
}
public function getName() {
return $this->name;
}
public function getFullPath() {
return $this->fullpath;
}
public function getMboxPath() {
return $this->mboxpath;
}
public function setMboxPath($p) {
$this->mboxpath = $p;
}
/**
* Add a child directory
*
* @param DirectoryObj $child
*/
public function addChild($child) {
$this->child[$child->getName()] = $child;
}
/**
* Returns all the child elements
*
* @return DirectoryObj
*/
public function getChild() {
return $this->child;
}
/**
* Returns a child directory given it's name
*
* @param string $name
* @return DirectoryObj
*/
public function getDirByName($name) {
return $this->child[$name];
}
public function hasChild() {
return !empty($this->child);
}
}
class DirParser {
private $fulldir = '';
private $dirname = '';
private $mdirfiles = 0;
private $parsedfiles = 0;
private $bogusfiles = 0;
private $output_fn = '';
private $dirhandle = null;
private $mboxhnd = null;
private $mboxfn = null;
private $mboxfilecnt = 1;
private $totalsize = 0;
private $chunks = 0;
private static $total_converted = 0;
private static $total_failed = 0;
public function __construct($fdir, $output) {
$this->fulldir = $fdir;
$this->output_fn = substr($output, 0, strrpos($output, '/')+1);
if ($this->dirhandle = opendir($fdir)) {
$this->dirname = substr($fdir,0,strlen($fdir)-1);
$this->dirname = substr($this->dirname,strrpos($this->dirname,DIR_SEPERATOR)+1);
} else {
printConsole("Error occured while opening directory: ".$fdir, true);
}
}
private function openFile() {
global $maildir, $mbox_extension;
$this->mboxfn = $this->output_fn.$this->dirname.($this->mboxfilecnt == 1 ? '' : $this->mboxfilecnt).($mbox_extension ? '.mbox' : '');
$this->mboxhnd = fopen($this->mboxfn,'w');
if (!$this->mboxhnd) {
printConsole("Error occured while opening mbox file for writing", true);
}
}
private function closeFile() {
global $remove_empty;
if (is_resource($this->mboxhnd)) {
fclose($this->mboxhnd);
}
unset($this->mboxhnd);
$this->mboxhnd = null;
if ($remove_empty && $this->mdirfiles == 0) {
unlink($this->mboxfn);
}
}
public function parse() {
$this->openFile();
while (false !== ($rfile = readdir($this->dirhandle))) {
if ($rfile == '..' || $rfile == '.' || !is_dir($this->fulldir.$rfile))
continue;
$curhandle = opendir($this->fulldir.$rfile);
if (!$curhandle) {
printConsole("[Error] Could not open directory: ".$this->fulldir.$rfile);
}
while (false !== ($file = readdir($curhandle))) {
$filename = $this->fulldir.$rfile.'/'.$file;
if (is_file($filename) && isMaildirFile($file)) {
print("\rFilecount: ".(++$this->mdirfiles)."\tTotal size written to mbox file: ".$this->totalsize);
$mdfile = new MailDirFile($filename);
$mdfile->readHeader();
if (!$mdfile->foundAddress()) {
printConsole("\nHeader not found for : ".$file.", file skipped");
$this->bogusfiles++;
unset($mdfile);
$mdfile = null;
continue;
}
if (intOverflowed($this->totalsize + $mdfile->getTotalSize())) {
$this->closeFile();
$this->mboxfilecnt++;
$this->chunks++;
$this->totalsize = 0;
print("\n\t\t\t\t\r");
$this->openFile();
}
++$this->parsedfiles;
$this->totalsize += $mdfile->getTotalSize();
fwrite($this->mboxhnd,$mdfile->getMboxHeader()."\n".$mdfile->getHeader());
$mdfile->streamWriteContents($this->mboxhnd);
fwrite($this->mboxhnd, "\n");
unset($mdfile);
$mdfile = null;
}
}
closedir($curhandle);
unset($curhandle);
$curhandle = null;
}
$this->closeFile();
}
public function getFileCount() {
return $this->mdirfiles;
}
public function printSummary() {
printConsole("\n".'------------------------ SUMMARY ------------------------');
printConsole("Wrote ".$this->mboxfilecnt." mbox files for this directory");
printConsole("Converted ".$this->parsedfiles.'/'.$this->mdirfiles." files successfully");
printConsole("Found ".$this->bogusfiles." files without a proper header and skipped them for this directory");
printConsole('---------------------------------------------------------');
self::$total_converted += $this->parsedfiles;
self::$total_failed += $this->bogusfiles;
}
public function getTotalConverted() {
return self::$total_converted;
}
public function getTotalFailed() {
return self::$total_failed;
}
public function getDirName() {
return $this->dirname;
}
public function __destruct() {
if (is_resource($this->dirhandle))
closedir($this->dirhandle);
unset($this->dirhandle);
$this->dirhandle = null;
}
}
class MailDirFile {
private $filename = '';
private $filesize = '';
private $handler = null;
private $mboxheader = 'From ';
private $from = '';
private $dt = '';
private $header = '';
private $contents='';
private $foundaddr = false;
private $fullmsg = '';
private static $search_array = array(
"From:"=>0, "X-From-Line:"=>0, "Reply-To:"=>0, "from:"=>0
);
private $search_array_filled = array(
"From:"=>'', "X-From-Line:"=>'', "Reply-To:"=>'', "from:"=>''
);
public function __construct($fname) {
$this->filename = $fname;
$this->dt = strtotime("Thu, 01 Jan 1970 00:00:00 +0000 (GMT)");
if (!$this->handler = fopen($fname, "r")) {
printConsole("Error occured while opening file: ".$fname, true);
}
$this->getFileSize();
}
public static function getSearchArray() {
return self::$search_array;
}
private function cleanAddress($addr) {
$addr = trim($addr);
if ($addr{0} == '<')
return substr($addr, 1,strlen($addr)-2);
return $addr;
}
public function readContents() {
while(!feof($this->handler)) {
$this->contents .= fread($this->handler, 8192);
}
}
public function streamWriteContents($fp) {
while(!feof($this->handler)) {
fwrite($fp, fread($this->handler, 8192));
}
}
public function readHeader() {
while ( ($line = fgets($this->handler)) && $line{0} != "\n") {
$foundfrom = false;
foreach (self::$search_array as $search=>&$count) {
if ($this->startsWith($search, $line)) {
$foundfrom = true;
$count++;
$this->foundaddr = true;
$linfo = explode(' ',$line);
foreach ($linfo as $item) {
if (strpos($item,'@') !== false) {
$this->search_array_filled[$search] = $this->cleanAddress($item);
break;
}
}
break;
}
}
if (!$foundfrom && $this->startsWith("Date:", $line)) {
$this->dt = gmdate("D M d H:i:s Y",strtotime(substr($line,6)));
}
$this->header .= $line;
}
$this->header .= $line;
$this->from = $this->search_array_filled['From:'];
if (empty($this->from)) {
$this->from = $this->search_array_filled['from:'];
}
if (empty($this->from)) {
$this->from = $this->search_array_filled['Reply-To:'];
}
if (empty($this->from)) {
$this->from = $this->search_array_filled['X-From-Line:'];
}
$this->mboxheader .= $this->from.' '.$this->dt;
}
public function foundAddress() {
return $this->foundaddr;
}
public function printContents() {
print($this->contents);
}
public function printHeader() {
print($this->header);
}
public function getMboxHeader() {
return $this->mboxheader;
}
public function getHeader() {
return $this->header;
}
public function getContents() {
return $this->contents;
}
public function getMboxMessage() {
$this->fullmsg = $this->mboxheader."\n".$this->header.$this->contents."\n";
return $this->fullmsg;
}
public function getTotalSize() {
return ($this->filesize += (strlen($this->mboxheader)+2));
}
private function getFileSize() {
fseek($this->handler,0,SEEK_END);
$this->filesize = ftell($this->handler);
fseek($this->handler,0,SEEK_SET);
}
private function startsWith($str, $line) {
return $str == substr($line,0,strlen($str));
}
public function getSize() {
return $this->filesize;
}
public function __destruct() {
if (is_resource($this->handler)) {
fclose($this->handler);
}
unset($this->handler);
$this->handler = null;
}
}
/********************** MAIN CODE **********************/
define('DIR_SEPERATOR','/');
define('OUTPUT_DIR','mboxfiles');
define('FILE_FORMAT',"/^[0-9]*\.[a-zA-Z0-9_]*\.[a-zA-Z0-9\:\,]*/");
$mbox_extension = false;
$remove_empty = false;
$program_name = $argv[0];
foreach ($argv as $k=>$argument) {
switch (trim($argument)) {
case '-e': {
$mbox_extension = true;
unset($argv[$k]);
$argc--;
break;
}
case '-x': {
$remove_empty = true;
unset($argv[$k]);
$argc--;
break;
}
case '-h': {
print_usage();
unset($argv[$k]);
$argc--;
break;
}
default: {
if ($argument{0} == '-') {
printConsole("Unknown Option: $argument");
print_usage(true);
}
}
}
}
// Re-index argv
$argv = array_values($argv);
if ($argc != 2) {
printConsole("No output directory specified");
print_usage();
}
$maildir = $argv[1];
// Check if the last character of the given directory is the dir seperator, if not add it.
if ($maildir{strlen($maildir)-1} != DIR_SEPERATOR) {
$maildir .= '/';
}
// If the given name is a file instead of a directory kill the script
if (!is_dir($maildir)) {
printConsole("$maildir is not a directory", true);
}
// Variable initialization
$memory_limit = '32M';
$oldlimit = '';
printConsole('Setting memory limit...');
if ( ($oldlimit = setMemoryLimit($memory_limit)) === false ) {
printConsole("Error setting memory limit to $memory_limit.", true);
} else {
printConsole("New memory limit: $memory_limit\nOld memory limit: $oldlimit");
}
$rootnode = new DirectoryObj(substr($maildir,0,strpos($maildir, '/')), $maildir, OUTPUT_DIR);
parseTree($rootnode, $rootnode->getFullPath());
makeTree($rootnode);
printConsole("Finished processing all the directories.\nOutput files are located in directory: ".OUTPUT_DIR."/");
printConsole("Total files converted: ".DirParser::getTotalConverted());
printConsole("Total files failed: ".DirParser::getTotalFailed());
?>
|