需求是这样的,需要登录带验证码的网站,获取数据,但是不可能人为一直去记录数据,想通过自动采集的方式进行,如下是试验出来的结果代码!有需要的可以参考下!
<?php namespace homecontroller; use thinkcontroller; class logincontroller extends controller { protected $cookiename = array('cookie_verify', 'cookie_verify'); protected $cookiepath = '/cookie/'; protected $cookiepathfile = array(); public function index() { $this->display(); } public function _initialize(){ foreach($this->cookiename as $key => $name) { $this->cookiepathfile[] = root_path . $this->cookiepath . $this->cookiename[$key] . '_xxx.txt'; } } /** * 登录xxx */ public function xxxlogin() { $username = i('username'); $password = i('password'); $verifycode = i('verify'); $logindata = array( '__viewstate' => '/wepdwukmtu0mzazotu4nmqyaquex19db250cm9sc1jlcxvpcmvqb3n0qmfja0tlev9ffgefdexvz2lux1n1ym1pdl/yae69nsy163g3yup0lxjz8oxu', //不把参数补全可能会不被响应哦 '__viewstategenerator' => 'dc42de27', 'txt_username' => $username, 'txt_pwd' => $password, 'txt_verifycode' => $verifycode, 'smoney' => 'abc', 'login_submit.x' => '52', 'login_submit.y' => '19', ); $getback = $this->_cookierequest('http://xxx.com/nologin.aspx', $logindata); if(preg_match('/<div[^<div]*?ids*=s*['"]{1}div_msg['"]{1}.*?>(.*?)</div>/s', $getback, $match)){ echo 'matchedrn'; print_r($match); }else{ echo $getback, '<br />'; $paramsfull = parse_url($getback); parse_str($paramsfull['query'], $paramsfull['parsedquery']); if(!empty($paramsfull['parsedquery']['warn'])) { $msg = "您好,欢迎来p,请先登录。"; switch ($paramsfull['parsedquery']['warn']) { case '2': $msg = '您输入的验证码错误,请重试'; break; case '3': $msg = '该帐号不存在,还没帐号?'; break; case '5': $msg = '账户已注销'; break; case '6': $msg = '密码错误,如果连续错误3次半小时内不能登录!'; break; case '20': $msg = '今日密码错误3次及以上,请于半小时后再来登录!'; break; case '21': $msg = '今日您所在ip的所有帐号密码错误9次以上,请于半小时后再来登录!'; break; case '22': $msg = '登录失败,您所在ip今日登录的帐号过多!'; break; case '23': $msg = '登录失败,验证码失效!'; break; case '32': $msg = '该帐号已经绑定其他xx帐号!'; break; case '33': $msg = '一台电脑一天只能注册一个帐号!'; break; } $this->error($msg, '', 5); }else{ $_session['user_id'] = '123456'; //登录设置session $this->success('登录p网站成功', u('index/index'), 5); } } } /** * 获取验证码 */ public function getverifycode() { $img = $this->_cookierequest('http://xxx.com/verifycode_login.aspx?id=' . rand(10000,999999), null, true, 1); echo $img; } /** * 删除cookie */ public function clearcookie() { for($i = 0; $i <count($this->cookiename); $i++) { setcookie($this->cookiename[$i], '', time() - 3600); } // unlink($this->cookiepathfile); $this->success('清除cookie成功!'); } /** * 带cookie的访问curl * @param $url 访问地址 * @param bool|array $data 传递的数据 * @param bool $redirect 是否获取重定向的地址 * @return mixed 地址或者返回内容 */ public function _cookierequest($url, $data = null, $redirect = false, $cookienum = 0) { $ch = curl_init(); $params[curlopt_url] = $url; //请求url地址 $params[curlopt_header] = false; //是否返回响应头信息 $params[curlopt_returntransfer] = true; //是否将结果返回 $params[curlopt_followlocation] = true; //是否重定向 $params[curlopt_useragent] = 'mozilla/5.0 (windows nt 5.1; rv:9.0.1) gecko/20100101 firefox/9.0.1'; if($data) { $params[curlopt_post] = true; $params[curlopt_postfields] = http_build_query($data); } //判断是否有cookie,有的话直接使用 if (!empty($_cookie[$this->cookiename[$cookienum]]) && is_file($this->cookiepathfile[$cookienum])) { $params[curlopt_cookiefile] = $this->cookiepathfile[$cookienum]; //这里判断cookie } else { // $cookie_jar = tempnam($cookie_path, 'cookie'); //产生一个cookie文件 $params[curlopt_cookiejar] = $this->cookiepathfile[$cookienum]; //写入cookie信息 setcookie($this->cookiename[$cookienum], $this->cookiepathfile[$cookienum], time() + 120); //保存cookie路径 } curl_setopt_array($ch, $params); //传入curl参数 $content = curl_exec($ch); $headers = curl_getinfo($ch); // echo $content; curl_close($ch); if ($url != $headers["url"] && $redirect == false) { return $headers["url"]; } return $content; } }
登录以后,就可以使用带cookie的访问其他页面了!
ps:php curl 登录淘宝
提交上去后显示为填写验证码,登录不上去
填写验证码提交:
<!doctype html public "-//w3c//dtd xhtml 1.0 transitional//en" "http://www.w3.org/tr/xhtml1/dtd/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <meta http-equiv="content-type" content="text/html; charset=gb2312" /> <title></title> </head> <body> <iframe id='img' src="b.php" width="950" height="300" scrolling="no" frameborder="0"></iframe> <form action="tb.php" method="post"> <textarea name="vv" cols="50" rows="10">umto=&action=authenticator&event_submit_do_login=anything&from=tb&fc=default&style=default&css_style=&tid=xor_1_000000000000000000000000000000_635045544 70a7c717f750278&support=000001&ctrlversion=1,0,0,7&logintype=3&minititle=&minipara=&pstrong=&llnick=&sign=&need_sign=&isignore=&full_redirect=&popid=&callback=&guf=¬_duplite_str=&need_user_id=&poy=xor_1_000000000000000000000000000000_625a424 a45137c6f7a7f0b786d08&gvfdcname=&gvfdcre=&from_encoding=&tpl_redirect_url=http:www.taobao.com&tpl_username=xxx&tpl_password=xxxx&need_check_code=&&tpl_checkcode=</textarea> <input type="submit" /> </form> </body> </html>
<?php session_start(); if(empty($_session['cookie_jar'])) exit(); $cookie_jar=$_session['cookie_jar']; $post_fields=$_post["vv"]; $ch = curl_init('https://login.taobao.com/member/login.jhtml'); curl_setopt($ch, curlopt_header, 0); curl_setopt($ch, curlopt_useragent, "mozilla/4.0 (compatible; msie 8.0; windows nt 6.1; trident/4.0; qqwubi 133; embedded web browser from: http://bsalsa.com/; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; tablet pc 2.0; .net4.0c; .net4.0e; infopath.3; media center pc 6.0)"); curl_setopt($ch, curlopt_returntransfer, 1); curl_setopt($ch, curlopt_post, 1); curl_setopt($ch, curlopt_postfields, $post_fields); curl_setopt($ch, curlopt_ssl_verifypeer, false); curl_setopt($ch, curlopt_ssl_verifyhost, 1); curl_setopt($ch, curlopt_cookiejar, $cookie_jar); $data = curl_exec($ch); curl_close($ch); echo $data;exit; $ch = curl_init('http://www.taobao.com'); curl_setopt($ch, curlopt_useragent, "mozilla/4.0 (compatible; msie 8.0; windows nt 6.1; trident/4.0; qqwubi 133; embedded web browser from: http://bsalsa.com/; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; tablet pc 2.0; .net4.0c; .net4.0e; infopath.3; media center pc 6.0)"); curl_setopt($ch, curlopt_header, 0); curl_setopt($ch, curlopt_returntransfer, 0); curl_setopt($ch, curlopt_cookiefile, $cookie_jar); curl_setopt($ch, curlopt_ssl_verifypeer, true); curl_exec($ch); curl_close($ch); ?>
提取验证码
<?php session_start(); $cookie_jar=tempnam("./temp/","cookie"); $_session['cookie_jar']=$cookie_jar; $post_fields = "action=authenticator&event_submit_do_login=anything&from=tb&fc=default&style=default&css_style=&tid=xor_1_000000000000000000000000000000_635045544 70a7c717f750278&support=000001&ctrlversion=1,0,0,7&logintype=3&minititle=&minipara=&pstrong=&llnick=&sign=&need_sign=&isignore=&full_redirect=&popid=&callback=&guf=¬_duplite_str=&need_user_id=&poy=xor_1_000000000000000000000000000000_625a424a45137c6f7a7f0b786d08&gvfdcname=&gvfdcre=&from_encoding=&tpl_redirect_url=http:www.taobao.com&tpl_username=xxx&tpl_password=xxx"; $ch = curl_init('https://login.taobao.com/member/login.jhtml'); curl_setopt($ch, curlopt_useragent, "mozilla/4.0 (compatible; msie 8.0; windows nt 6.1; trident/4.0; qqwubi 133; embedded web browser from: http://bsalsa.com/; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; tablet pc 2.0; .net4.0c; .net4.0e; infopath.3; media center pc 6.0)"); curl_setopt($ch, curlopt_header, 0); curl_setopt($ch, curlopt_returntransfer, 1); curl_setopt($ch, curlopt_ssl_verifypeer, false); curl_setopt($ch, curlopt_ssl_verifyhost, 2); $data = curl_exec($ch); curl_close($ch); preg_match("/id="um_to" name="umto" value="(.*?)"/>/", $data, $arr); $post_fields = "umto=" . $arr[1] . "&" . $post_fields."&tpl_checkcode="; echo "<textarea cols=50 rows=10>" . $post_fields . "</textarea><br/>" ; $ch = curl_init('https://login.taobao.com/member/login.jhtml'); curl_setopt($ch, curlopt_header, 0); curl_setopt($ch, curlopt_useragent, "mozilla/4.0 (compatible; msie 8.0; windows nt 6.1; trident/4.0; qqwubi 133; embedded web browser from: http://bsalsa.com/; slcc2; .net clr 2.0.50727; .net clr 3.5.30729; .net clr 3.0.30729; tablet pc 2.0; .net4.0c; .net4.0e; infopath.3; media center pc 6.0)"); curl_setopt($ch, curlopt_returntransfer, 1); curl_setopt($ch, curlopt_post, 1); curl_setopt($ch, curlopt_postfields, $post_fields); curl_setopt($ch, curlopt_ssl_verifypeer, false); curl_setopt($ch, curlopt_ssl_verifyhost, 2); curl_setopt($ch,curlopt_cookiejar,$cookie_jar); curl_setopt($ch,curlopt_cookiefile,$cookie_jar); $data = curl_exec($ch); curl_close($ch); preg_match("/img id="j_standardcode_m" src="(.*?)" data-src=/", $data, $arr1); echo "<img src=".$arr1[1]." />"; exit; ?>
【说明】:本文章由站长整理发布,文章内容不代表本站观点,如文中有侵权行为,请与本站客服联系(QQ:)!