PHP curl support 301/302

<?php
exit;
$r = curl_get('https://instagram.com/p/BE68tc_BQeV/');
echo $r;
 
 
//only support get, no post, support 301/302
function curl_get($url,$headerShow = true,$loop=0)
{
    $loop++;
    if($loop>3)return false;
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($ch, CURLOPT_HEADER, $headerShow);
    //curl_setopt($ch, CURLOPT_POSTREDIR, 3); //follow redirect with the same type of request both for 301 and 302 redirects.
    //curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //follow redirect, 5 times max
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 
    $header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
    $header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
    $header[] = "Cache-Control: max-age=0";
    $header[] = "Connection:keep-alive";
    $header[] = "Keep-Alive: 300";
    $header[] = "Accept-Language: en-US,en;q=0.8,ja;q=0.6,zh-CN;q=0.4,zh;q=0.2,zh-TW;q=0.2";
    $header[] = "Pragma:no-cache"; // browsers keep this blank.
    curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
    curl_setopt($ch, CURLOPT_TIMEOUT, 60);
    $result = curl_exec($ch);
    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
 
    //if httpCode=301 or 302
    if (($httpCode == 301 || $httpCode == 302))
    {
 
        //var_dump($result);
        $headerInfo = '';
        $headerArr = explode("\n", $result);
        foreach($headerArr as $v){
            if(stristr($v,'Location')){
                $headerInfo = $v;
                break;
            }
        }
        //var_dump($headerInfo);
        preg_match('/Location:(.*)/', $headerInfo, $matches);
        //var_dump($matches);
        $url = parse_url(trim($matches[1]));
        if (!$url)
        {
            //couldn't process the url to redirect to
            //write log
            $time = date('Y-m-d H:i:s');
            $error = "couldn't process the url to redirect to";
            $log = "{time:$time}{code:$httpCode}{error:$error}\r\n";
            file_put_contents('/home/xxx/log_curl_get.php', $log,FILE_APPEND);
        }
 
        $last_url = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
        //var_dump($url);
        //var_dump($last_url);
 
        $new_url['scheme'] = !isset($url['scheme']) ? $last_url['scheme'] : $url['scheme'];
        $new_url['host'] = !isset($url['host']) ? $last_url['host'] : $url['host'];
        $new_url['path'] = !isset($url['path']) ? $last_url['path'] : $url['path'];
        $new_url['query'] = !isset($url['query']) ? $last_url['query'] : $url['query'];
 
        $new_url = $new_url['scheme'] . '://' . $new_url['host'] .'/'. $new_url['path'] .'?'. $new_url['query'];
        //echo $new_url;
        return curl_get($new_url,true,$loop);
    } elseif($httpCode=='200')
    {
        //delete the header
        $result = removeHeader($result);
    }
 
    if ($httpCode != '200')
    {
        $result = false;
        //write log
        $time = date('Y-m-d H:i:s');
        $error = curl_error($ch);
        $log = "{time:$time}{code:$httpCode}{error:$error}\r\n";
        file_put_contents('/home/xxx/log_curl_get.php', $log,FILE_APPEND);
    }
    curl_close($ch);
    return $result;  
}
 
 
 
 
 
 
 
//only support post, no get, support 301/302
function curl_post($url, $data = array(), $headerShow = true,$loop=0)
{
    $loop++;
    if($loop>3)return false;
    $ch = curl_init();
    curl_setopt($ch, CURLOPT_URL, $url);
    curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
    curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
    curl_setopt($ch, CURLOPT_HEADER, $headerShow);
    //curl_setopt($ch, CURLOPT_POSTREDIR, 3); //follow redirect with the same type of request both for 301 and 302 redirects.
    //curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true); //follow redirect, 5 times max
    curl_setopt($ch, CURLOPT_POST, true);
    if (!empty($data))
    {
        curl_setopt($ch, CURLOPT_POSTFIELDS, $data);
    }
    curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
 
    $header[0] = "Accept: text/xml,application/xml,application/xhtml+xml,";
    $header[0] .= "text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
    $header[] = "Cache-Control: max-age=0";
    $header[] = "Connection:keep-alive";
    $header[] = "Keep-Alive: 300";
    $header[] = "Accept-Language: en-US,en;q=0.8,ja;q=0.6,zh-CN;q=0.4,zh;q=0.2,zh-TW;q=0.2";
    $header[] = "Pragma:no-cache"; // browsers keep this blank.
    curl_setopt($ch, CURLOPT_HTTPHEADER, $header);
    curl_setopt($ch, CURLOPT_TIMEOUT, 60);
    $result = curl_exec($ch);
    $httpCode = curl_getinfo($ch, CURLINFO_HTTP_CODE);
 
    //if httpCode=301 or 302
    if (($httpCode == 301 || $httpCode == 302))
    {
        //var_dump($result);
        $headerInfo = '';
        $headerArr = explode("\n", $result);
        foreach($headerArr as $v){
            if(stristr($v,'Location')){
                $headerInfo = $v;
                break;
            }
        }
        //var_dump($headerInfo);
        preg_match('/Location:(.*)/', $headerInfo, $matches);
        //var_dump($matches);
        $url = parse_url(trim($matches[1]));
        if (!$url)
        {
            //couldn't process the url to redirect to
            //write log
            $time = date('Y-m-d H:i:s');
            $error = "couldn't process the url to redirect to";
            $log = "{time:$time}{code:$httpCode}{error:$error}\r\n";
            file_put_contents('/home/xxx/log_curl_post.php', $log,FILE_APPEND);
        }
 
        $last_url = parse_url(curl_getinfo($ch, CURLINFO_EFFECTIVE_URL));
 
        $new_url['scheme'] = !isset($url['scheme']) ? $last_url['scheme'] : $url['scheme'];
        $new_url['host'] = !isset($url['host']) ? $last_url['host'] : $url['host'];
        $new_url['path'] = !isset($url['path']) ? $last_url['path'] : $url['path'];
        $new_url['query'] = !isset($url['query']) ? $last_url['query'] : $url['query'];
 
        $new_url = $new_url['scheme'] . '://' . $new_url['host'] .'/'. $new_url['path'] .'?'. $new_url['query'];
        return curl_post($new_url,$data,$headerShow,$loop);
    }elseif($httpCode=='200')
    {
        //delete the header
        $result = removeHeader($result);
 
    }
 
    if ($httpCode != '200')
    {
        $result = false;
        //write log
        $time = date('Y-m-d H:i:s');
        $error = curl_error($ch);
        $log = "{time:$time}{code:$httpCode}{error:$error}\r\n";
        file_put_contents('/home/xxx/log_curl_post.php', $log,FILE_APPEND);
    }
    curl_close($ch);
    return $result;
}
 
function removeHeader($result){
    preg_match("/(.*)\r\n\r\n/iU",$result,$matchs);
    $startPos = strpos($result,$matchs[0])+strlen($matchs[0]);
    $result = substr_replace($result,'',0,$startPos);
    if(preg_match('/^HTTP/iU',$result) && strstr($result,'Content-Length:') && strstr($result,'Content-Type')){
        return removeHeader($result);
    }
    return $result;
}

Related posts:

Leave a Reply

Your email address will not be published.