当前位置:首页 > PHP教程 > PHP常见问题

PHP curl模拟登录带验证码的网站

需求是这样的,需要登录带验证码的网站,获取数据,但是不可能人为一直去记录数据,想通过自动采集的方式进行,如下是试验出来的结果代码!有需要的可以参考下!

<?php
namespace homecontroller;
use thinkcontroller;
class logincontroller extends controller
{
  protected $cookiename = array('cookie_verify', 'cookie_verify');
  protected $cookiepath = '/cookie/';
  protected $cookiepathfile = array();
  public function index()
  {
    $this->display();
  }
  public function _initialize(){
    foreach($this->cookiename as $key => $name)
    {
      $this->cookiepathfile[] = root_path . $this->cookiepath . $this->cookiename[$key] . '_xxx.txt';
    }
  }
  /**
   * 登录xxx
   */
  public function xxxlogin()
  {
    $username = i('username');
    $password = i('password');
    $verifycode = i('verify');
    $logindata = array(
      '__viewstate' => '/wepdwukmtu0mzazotu4nmqyaquex19db250cm9sc1jlcxvpcmvqb3n0qmfja0tlev9ffgefdexvz2lux1n1ym1pdl/yae69nsy163g3yup0lxjz8oxu',              //不把参数补全可能会不被响应哦
      '__viewstategenerator' => 'dc42de27',
      'txt_username' => $username,
      'txt_pwd' => $password,
      'txt_verifycode' => $verifycode,
      'smoney' => 'abc',
      'login_submit.x' => '52',
      'login_submit.y' => '19',
    );
    $getback = $this->_cookierequest('http://xxx.com/nologin.aspx', $logindata);
    if(preg_match('/<div[^<div]*?ids*=s*['"]{1}div_msg['"]{1}.*?>(.*?)</div>/s', $getback, $match)){
      echo 'matchedrn';
      print_r($match);
    }else{
      echo $getback, '<br />';
      $paramsfull = parse_url($getback);
      parse_str($paramsfull['query'], $paramsfull['parsedquery']);
      if(!empty($paramsfull['parsedquery']['warn'])) {
        $msg = "您好,欢迎来p,请先登录。";
        switch ($paramsfull['parsedquery']['warn'])
        {
          case '2':
            $msg = '您输入的验证码错误,请重试';
            break;
          case '3':
            $msg = '该帐号不存在,还没帐号?';
            break;
          case '5':
            $msg = '账户已注销';
            break;
          case '6':
            $msg = '密码错误,如果连续错误3次半小时内不能登录!';
            break;
          case '20':
            $msg = '今日密码错误3次及以上,请于半小时后再来登录!';
            break;
          case '21':
            $msg = '今日您所在ip的所有帐号密码错误9次以上,请于半小时后再来登录!';
            break;
          case '22':
            $msg = '登录失败,您所在ip今日登录的帐号过多!';
            break;
          case '23':
            $msg = '登录失败,验证码失效!';
            break;
          case '32':
            $msg = '该帐号已经绑定其他xx帐号!';
            break;
          case '33':
            $msg = '一台电脑一天只能注册一个帐号!';
            break;
        }
        $this->error($msg, '', 5);
      }else{
        $_session['user_id'] = '123456';      //登录设置session
        $this->success('登录p网站成功', u('index/index'), 5);
      }
    }
  }
  /**
   * 获取验证码
   */
  public function getverifycode()
  {
    $img = $this->_cookierequest('http://xxx.com/verifycode_login.aspx?id=' . rand(10000,999999), null, true, 1);
    echo $img;
  }
  /**
   * 删除cookie
   */
  public function clearcookie()
  {
    for($i = 0; $i <count($this->cookiename); $i++)
    {
      setcookie($this->cookiename[$i], '', time() - 3600);
    }
//    unlink($this->cookiepathfile);
    $this->success('清除cookie成功!');
  }
  /**
   * 带cookie的访问curl
   * @param $url 访问地址
   * @param bool|array $data 传递的数据
   * @param bool $redirect 是否获取重定向的地址
   * @return mixed 地址或者返回内容
   */
  public function _cookierequest($url, $data = null, $redirect = false, $cookienum = 0)
  {
    $ch = curl_init();
    $params[curlopt_url] = $url;             //请求url地址
    $params[curlopt_header] = false;           //是否返回响应头信息
    $params[curlopt_returntransfer] = true;       //是否将结果返回
    $params[curlopt_followlocation] = true;       //是否重定向
    $params[curlopt_useragent] = 'mozilla/5.0 (windows nt 5.1; rv:9.0.1) gecko/20100101 firefox/9.0.1';
    if($data)
    {
      $params[curlopt_post] = true;
      $params[curlopt_postfields] = http_build_query($data);
    }
    //判断是否有cookie,有的话直接使用
    if (!empty($_cookie[$this->cookiename[$cookienum]]) && is_file($this->cookiepathfile[$cookienum]))
    {
      $params[curlopt_cookiefile] = $this->cookiepathfile[$cookienum];   //这里判断cookie
    }
    else
    {
//      $cookie_jar = tempnam($cookie_path, 'cookie');            //产生一个cookie文件
      $params[curlopt_cookiejar] = $this->cookiepathfile[$cookienum];    //写入cookie信息
      setcookie($this->cookiename[$cookienum], $this->cookiepathfile[$cookienum], time() + 120);   //保存cookie路径
    }
    curl_setopt_array($ch, $params);                      //传入curl参数
    $content = curl_exec($ch);
    $headers = curl_getinfo($ch);
//    echo $content;
    curl_close($ch);
    if ($url != $headers["url"] && $redirect == false)
     {
      return $headers["url"];
     }
      return $content;
     }
  }

登录以后,就可以使用带cookie的访问其他页面了!

ps:php curl 登录淘宝

提交上去后显示为填写验证码,登录不上去

 填写验证码提交:

<!doctype html public "-//w3c//dtd xhtml 1.0 transitional//en" "http://www.w3.org/tr/xhtml1/dtd/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
  <head>    
   <meta http-equiv="content-type" content="text/html; charset=gb2312" />
   <title></title>
  </head>
  <body>
  <iframe id='img' src="b.php" width="950" height="300" scrolling="no" frameborder="0"></iframe>
  <form action="tb.php" method="post">
  <textarea name="vv" cols="50" rows="10">umto=&action=authenticator&event_submit_do_login=anything&from=tb&fc=default&style=default&css_style=&tid=xor_1_000000000000000000000000000000_635045544
70a7c717f750278&support=000001&ctrlversion=1,0,0,7&logintype=3&minititle=&minipara=&pstrong=&llnick=&sign=&need_sign=&isignore=&full_redirect=&popid=&callback=&guf=¬_duplite_str=&need_user_id=&poy=xor_1_000000000000000000000000000000_625a424
a45137c6f7a7f0b786d08&gvfdcname=&gvfdcre=&from_encoding=&tpl_redirect_url=http:www.taobao.com&tpl_username=xxx&tpl_password=xxxx&need_check_code=&&tpl_checkcode=</textarea>
  <input type="submit" />
  </form>
 
  </body>
</html>
<?php
session_start();
if(empty($_session['cookie_jar'])) exit();
$cookie_jar=$_session['cookie_jar'];
$post_fields=$_post["vv"];
$ch = curl_init('https://login.taobao.com/member/login.jhtml'); 
curl_setopt($ch, curlopt_header, 0); 
curl_setopt($ch, curlopt_useragent, 
"mozilla/4.0 (compatible; msie 8.0; windows nt 6.1; trident/4.0; qqwubi 133; embedded web browser from: http://bsalsa.com/; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; tablet pc 2.0; .net4.0c; .net4.0e; infopath.3; media center pc 6.0)"); 
curl_setopt($ch, curlopt_returntransfer, 1); 
curl_setopt($ch, curlopt_post, 1); 
curl_setopt($ch, curlopt_postfields, $post_fields); 
curl_setopt($ch, curlopt_ssl_verifypeer, false); 
curl_setopt($ch, curlopt_ssl_verifyhost, 1); 
curl_setopt($ch, curlopt_cookiejar, $cookie_jar); 
$data = curl_exec($ch); 
curl_close($ch);
echo $data;exit;
$ch = curl_init('http://www.taobao.com'); 
curl_setopt($ch, curlopt_useragent, 
"mozilla/4.0 (compatible; msie 8.0; windows nt 6.1; trident/4.0; qqwubi 133; embedded web browser from: http://bsalsa.com/; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; tablet pc 2.0; .net4.0c; .net4.0e; infopath.3; media center pc 6.0)"); 
curl_setopt($ch, curlopt_header, 0); 
curl_setopt($ch, curlopt_returntransfer, 0); 
curl_setopt($ch, curlopt_cookiefile, $cookie_jar); 
curl_setopt($ch, curlopt_ssl_verifypeer, true); 
curl_exec($ch); 
curl_close($ch); 
?>

提取验证码

<?php
session_start();
$cookie_jar=tempnam("./temp/","cookie");
$_session['cookie_jar']=$cookie_jar;
$post_fields = "action=authenticator&event_submit_do_login=anything&from=tb&fc=default&style=default&css_style=&tid=xor_1_000000000000000000000000000000_635045544
70a7c717f750278&support=000001&ctrlversion=1,0,0,7&logintype=3&minititle=&minipara=&pstrong=&llnick=&sign=&need_sign=&isignore=&full_redirect=&popid=&callback=&guf=¬_duplite_str=&need_user_id=&poy=xor_1_000000000000000000000000000000_625a424a45137c6f7a7f0b786d08&gvfdcname=&gvfdcre=&from_encoding=&tpl_redirect_url=http:www.taobao.com&tpl_username=xxx&tpl_password=xxx"; 
$ch = curl_init('https://login.taobao.com/member/login.jhtml'); 
curl_setopt($ch, curlopt_useragent, 
"mozilla/4.0 (compatible; msie 8.0; windows nt 6.1; trident/4.0; qqwubi 133; embedded web browser from: http://bsalsa.com/; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; tablet pc 2.0; .net4.0c; .net4.0e; infopath.3; media center pc 6.0)"); 
curl_setopt($ch, curlopt_header, 0); 
curl_setopt($ch, curlopt_returntransfer, 1); 
curl_setopt($ch, curlopt_ssl_verifypeer, false); 
curl_setopt($ch, curlopt_ssl_verifyhost, 2); 
$data = curl_exec($ch); 
curl_close($ch); 
preg_match("/id="um_to" name="umto" value="(.*?)"/>/", $data, $arr); 
$post_fields = "umto=" . $arr[1] . "&" . $post_fields."&tpl_checkcode="; 
echo "<textarea cols=50 rows=10>" . $post_fields . "</textarea><br/>" ; 
$ch = curl_init('https://login.taobao.com/member/login.jhtml'); 
curl_setopt($ch, curlopt_header, 0); 
curl_setopt($ch, curlopt_useragent, 
"mozilla/4.0 (compatible; msie 8.0; windows nt 6.1; trident/4.0; qqwubi 133; embedded web browser from: http://bsalsa.com/; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; tablet pc 2.0; .net4.0c; .net4.0e; infopath.3; media center pc 6.0)"); 
curl_setopt($ch, curlopt_returntransfer, 1); 
curl_setopt($ch, curlopt_post, 1); 
curl_setopt($ch, curlopt_postfields, $post_fields); 
curl_setopt($ch, curlopt_ssl_verifypeer, false); 
curl_setopt($ch, curlopt_ssl_verifyhost, 2); 
curl_setopt($ch,curlopt_cookiejar,$cookie_jar);
curl_setopt($ch,curlopt_cookiefile,$cookie_jar); 
$data = curl_exec($ch); 
curl_close($ch);
preg_match("/img id="j_standardcode_m" src="(.*?)" data-src=/", $data, $arr1); 
echo "<img src=".$arr1[1]." />";
exit;
?>

【说明】本文章由站长整理发布,文章内容不代表本站观点,如文中有侵权行为,请与本站客服联系(QQ:)!