How Can I Run a Local PHP Simple HTML DOM Parser with a Proxy?

I have a PHP Simple HTML DOM Parser locally in MAMP that pulls information and works well with the Japan version of a website, since I'm located in Japan. However, I would like to pull information from the UK version of the site. What is the simplest way to do this?

I tried the following from the documentation and it didn't work.

$context = array('http' => array('proxy' => '212.82.126.32:80','request_fulluri' => true,),);
$stream = stream_context_create($context);

$html = file_get_html('http://www.supremenewyork.com/shop/new', false, $stream);

I also tried the curl version with modifications as the site has safe mode enabled. That didn't work as well.

function curl_exec_follow(/*resource*/ $ch, /*int*/ &$maxredirect = null) { 
    $mr = $maxredirect === null ? 5 : intval($maxredirect); 
    if (ini_get('open_basedir') == '' && ini_get('safe_mode' == 'Off')) { 
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, $mr > 0); 
        curl_setopt($ch, CURLOPT_MAXREDIRS, $mr); 
    } else { 
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, false); 
        if ($mr > 0) { 
            $newurl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); 

            $rch = curl_copy_handle($ch); 
            curl_setopt($rch, CURLOPT_HEADER, true); 
            curl_setopt($rch, CURLOPT_NOBODY, true); 
            curl_setopt($rch, CURLOPT_FORBID_REUSE, false); 
            curl_setopt($rch, CURLOPT_RETURNTRANSFER, true); 
            do { 
                curl_setopt($rch, CURLOPT_URL, $newurl); 
                $header = curl_exec($rch); 
                if (curl_errno($rch)) { 
                    $code = 0; 
                } else { 
                    $code = curl_getinfo($rch, CURLINFO_HTTP_CODE); 
                    if ($code == 301 || $code == 302) { 
                        preg_match('/Location:(.*?)\n/', $header, $matches); 
                        $newurl = trim(array_pop($matches)); 
                    } else { 
                        $code = 0; 
                    } 
                } 
            } while ($code && --$mr); 
            curl_close($rch); 
            if (!$mr) { 
                if ($maxredirect === null) { 
                    trigger_error('Too many redirects. When following redirects, libcurl hit the maximum amount.', E_USER_WARNING); 
                } else { 
                    $maxredirect = 0; 
                } 
                return false; 
            } 
            curl_setopt($ch, CURLOPT_URL, $newurl); 
        } 
    } 
    return curl_exec($ch); 
} 



$url = 'http://www.supremenewyork.com/shop/new';
$proxy = '212.82.126.32:80';

$options = array( 
    CURLOPT_PROXY          => $proxy,
    CURLOPT_HTTPPROXYTUNNEL => 0,
    CURLOPT_REFERER        => "http://www.google.com",
    CURLOPT_FOLLOWLOCATION => true,
    CURLOPT_RETURNTRANSFER => true,
    CURLOPT_USERAGENT      => "Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1", 
    CURLOPT_CONNECTTIMEOUT => 20,
    CURLOPT_TIMEOUT        => 20,
    CURLOPT_MAXREDIRS      => 10,
    CURLOPT_HEADER         => true,

); 

$ch = curl_init( $url ); 
//curl_setopt_array( $ch, $options ); 
$content = curl_exec_follow( $ch ); 

$html = new simple_html_dom();
$html->load($content,true,false);

I tried uploading to US and UK servers as well, but that didn't work and it just pulls US data. Some help please?

// define cookie file path here define('CRAWLER_COOKIE_FILENAME', 'cookie.txt'); function curl_exec_follow($url) { $proxy = '212.82.126.32:80'; $agent = 'Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.1.1) Gecko/20061204 Firefox/2.0.0.1'; // Some websites check referrer $host = parse_url($url, PHP_URL_HOST); $scheme = parse_url($url, PHP_URL_SCHEME); $referrer = $scheme . '://' . $host; $ch = curl_init(); $curl_defaults = array( CURLOPT_HEADER => 0, CURLOPT_FOLLOWLOCATION => 1, CURLOPT_RETURNTRANSFER => 1, ); curl_setopt_array($ch, $curl_defaults); curl_setopt($ch, CURLOPT_URL, $url); curl_setopt($ch, CURLOPT_PROXY, $proxy); curl_setopt($ch, CURLOPT_USERAGENT, $agent); curl_setopt($ch, CURLOPT_REFERER, $referrer); if ( !file_exists(CRAWLER_COOKIE_FILENAME) || !is_writable(CRAWLER_COOKIE_FILENAME) ) { echo 'Cookie file is missing or not writable.'; exit; } curl_setopt($ch, CURLOPT_COOKIESESSION, 0); curl_setopt($ch, CURLOPT_COOKIEFILE, CRAWLER_COOKIE_FILENAME); curl_setopt($ch, CURLOPT_COOKIEJAR, CRAWLER_COOKIE_FILENAME); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); // allow to crawl https webpages curl_setopt($ch,CURLOPT_SSL_VERIFYHOST,0); curl_setopt($ch,CURLOPT_SSL_VERIFYPEER,0); // the download speed must be at least 1 byte per second curl_setopt($ch,CURLOPT_LOW_SPEED_LIMIT, 1); // if the download speed is below 1 byte per second for more than 30 seconds curl will give up curl_setopt($ch,CURLOPT_LOW_SPEED_TIME, 30); $content = curl_exec($ch); if ($ret === FALSE) { echo curl_error($ch); } $code = curl_getinfo($ch, CURLINFO_HTTP_CODE); if ( $code != '200' ) echo 'http error code: ' . $code; curl_close($ch); return $content; }

Recommended topics

Hot tags