| 
<?php
 // this was used to get canadian government organization names and their acronyms
 // as well as contact information using the found acronyms from government electronic directory services.
 
 $OrganizationListPage = file_get_contents("http://direct.srv.gc.ca/cgi-bin/direct500/");
 $OrganizationArray = findOrgsInURLs($OrganizationListPage);
 $OrganizationNameArray = findOrgNamesInURLs($OrganizationListPage);
 
 foreach($OrganizationArray as $index => $acro) {
 preg_match("/([^\-]*)-/", $acro, $matches);
 $match = $matches[1];
 $ToWrite .= $match . "    " . $OrganizationNameArray[$index] . "\r\n";
 }
 $file = "eng/GOC/acronyms.txt";
 $fp = fopen($file, 'w');
 fwrite($fp, $ToWrite);
 fclose($fp);
 
 $FraOrganizationListPage = file_get_contents("http://direct.srv.gc.ca/cgi-bin/direct500/XFo%3dGC%2cc%3dCA");
 $FraOrganizationArray = FrafindOrgsInURLs($FraOrganizationListPage);
 $FraOrganizationNameArray = FrafindOrgNamesInURLs($FraOrganizationListPage);
 
 foreach($FraOrganizationArray as $Fraindex => $Fraacro) {
 preg_match("/-([^\-]*)/", $Fraacro, $Framatches);
 $Framatch = $Framatches[1];
 $FraToWrite .= $Framatch . "    " . $FraOrganizationNameArray[$Fraindex] . "\r\n";
 }
 $Frafile = "fra/GDC/acronyms.txt";
 $Frafp = fopen($Frafile, 'w');
 fwrite($Frafp, $FraToWrite);
 fclose($Frafp);
 
 exit(0);
 
 // from here we are getting contact information
 $URLArray = array();
 foreach($OrganizationArray as $OrgAcro) {
 $PageWithPeople = file_get_contents("http://direct.srv.gc.ca/cgi-bin/direct500/SEou%3d$OrgAcro%2co%3dGC%2cc%3dCA?SV=web&SF=Title&ST=contains&x=31&y=20");
 $URLArray = array_merge($URLArray, findURLs($PageWithPeople));
 }
 
 $rxpArray = array(
 // Name
 '/(<h2>)([\s]{0,10})([^\r\n]*)([\s]{0,10})(<a)/is' => '$3',
 // Title
 '/(<!-- Display detailed information -->)([\s]{0,10})(<div class="*text"*>)([\s]{0,10})([^<]*)(<br>)(<br>)([\s]{0,15})(<!-- title of person -->)/is' => '$5',
 // Organization 1
 '/(<!-- title of person -->)([\s]{0,10})([\w ,\-\(\)é]*)([\s]{0,10})(<br>)([\s]{0,10})(<!-- top level OU -->)/is' => '${3}',
 // Organization 2
 '/(<!-- top level OU -->)([\s]{0,10})([\w ,\-\(\)é]*)([\s]{0,10})*(<br>)([\s]{0,10})(<!-- immediate OU -->)/is' => '$3',
 // Address
 '/(<!-- Address - PO Box - Mail stop - City - Province - Contry - Postal code -->)([\s]{0,10})(<br>)([^<]*)(<br>)/is' => '$4',
 // City, Province
 '/(ITEM=\[\]\$-->)([\s]{0,10})(<br>)([\w ,\-\(\)é]*)([\s]{0,10})(<br>)/is' => '$4',
 // Country
 '/(<br>)([\w]*)(<br>)([\s]{0,10})([\w \-\(\)é]*)([\s]{0,10})(<!-- Telephone - Alternate telephone - Secure telephone - Fax - Secure Fax - TDD -->)/is' => '$2',
 // Postal Code
 '/(<br>)([\s]{0,10})([\w ]*)([\s]{0,10})(<!-- Telephone - Alternate telephone - Secure telephone - Fax - Secure Fax - TDD -->)/is' => '$3',
 // Telephone 1
 '/(<!-- Telephone - Alternate telephone - Secure telephone - Fax - Secure Fax - TDD -->)([\s]{0,10})(<dl>)([\s]{0,10})(<dt>Telephone:<\/dt><dd>)([^<]*)(<\/dd>)/is' => '$6',
 // Telephone 2
 '/(<dd>)([\w\(\)\- ]*)(<\/dd>)([\s]{0,10})(<dt>Fax:<\/dt><dd>)([^<]*)(<\/dd>)([\s]{0,10})(<\/dl>)([\s]{0,10})(<!-- X400 address -->)/is' => '$2',
 // Fax
 '/(<dt>Fax:<\/dt><dd>)([^<]*)(<\/dd>)([\s]{0,10})(<\/dl>)([\s]{0,10})(<!-- X400 address -->)/is' => '$2',
 );
 
 // these spaces are tabs (since we want a tab-separated spreadsheet out of this).
 $record = ("Name" . "    ");
 $record .= ("Title" . "    ");
 $record .= ("Organization 1" . "    ");
 $record .= ("Organization 2" . "    ");
 $record .= ("Address" . "    ");
 $record .= ("City, Province" . "    ");
 $record .= ("Country" . "    ");
 $record .= ("Postal Code" . "    ");
 $record .= ("Telephone 1" . "    ");
 $record .= ("Telephone 2" . "    ");
 $record .= ("Fax" . "\r\n");
 
 foreach ($URLArray as $file) {
 $fileContents = file_get_contents($file);
 $WhatToAdd = FindStuff($fileContents, $rxpArray);
 if ("                                            \r\n" != $WhatToAdd) {
 $record .= $WhatToAdd;
 }
 }
 
 WriteFile("GEDS-record.txt", $record);
 
 function WriteFile($strTargetx, $tpx) {
 // permission must be modified so that this file can be written to.
 $handle = fopen($strTargetx, 'w');
 fwrite($handle, $tpx);
 fclose($handle);
 }
 
 function FindStuff($strToFindOn, $rxpArray) {
 $newRecordToAppend = "";
 foreach ($rxpArray as $rxp => $replacement) {
 preg_match($rxp, $strToFindOn, $matches);
 // this space is a tab (since we want a tab-separated spreadsheet out of this).
 $newRecordToAppend .= (ReplaceStuff($matches[0], $rxp, $replacement) . "    ");
 }
 $newRecordToAppend .= "\r\n";
 return($newRecordToAppend);
 }
 
 function FindURLs($strToFindOn) {
 preg_match_all("/<li><a\shref=\"([^\"]*)\"/is", $strToFindOn, $matches, PREG_PATTERN_ORDER);
 return($matches[1]);
 }
 
 function FindOrgsInURLs($strToFindOn) {
 preg_match_all("/<li><a\shref=\"http:\/\/direct\.srv\.gc\.ca\/cgi\-bin\/direct500\/XEou%3d([^\"]*)%2co%3dGC%2cc%3dCA\"/is", $strToFindOn, $matches, PREG_PATTERN_ORDER);
 return($matches[1]);
 }
 
 function FraFindOrgsInURLs($strToFindOn) {
 preg_match_all("/<li><a\shref=\"http:\/\/direct\.srv\.gc\.ca\/cgi\-bin\/direct500\/XFou%3d([^\"]*)%2co%3dGC%2cc%3dCA\"/is", $strToFindOn, $matches, PREG_PATTERN_ORDER);
 return($matches[1]);
 }
 
 function FindOrgNamesInURLs($strToFindOn) {
 preg_match_all("/<li><a\shref=\"http:\/\/direct\.srv\.gc\.ca\/cgi\-bin\/direct500\/XEou%3d([^\"]*)%2co%3dGC%2cc%3dCA\">([^<]*)/is", $strToFindOn, $matches, PREG_PATTERN_ORDER);
 return($matches[2]);
 }
 
 function FraFindOrgNamesInURLs($strToFindOn) {
 preg_match_all("/<li><a\shref=\"http:\/\/direct\.srv\.gc\.ca\/cgi\-bin\/direct500\/XFou%3d([^\"]*)%2co%3dGC%2cc%3dCA\">([^<]*)/is", $strToFindOn, $matches, PREG_PATTERN_ORDER);
 return($matches[2]);
 }
 
 function ReplaceStuff($strToReplaceOn, $rxp, $replacement) {
 $strToReplaceOn = preg_replace($rxp, $replacement, $strToReplaceOn);
 return($strToReplaceOn);
 }
 
 ?>
 |