您现在的位置是: 网站首页>文章详情 文章详情

PHP招聘网爬虫源码分享

Heartless Wolf 1562545962 php 13644 收藏

简介 PHP招聘网爬虫源码分享

<?php

require 'vendor\autoload.php';

use QL\QueryList;



for ($i = 1; $i < 2; $i++) {

   $tmp_arr = get_info("https://www.zhipin.com/c101180100-p100103/?page={$i}");

   $final_arr = make_data($tmp_arr);

   make_sql($final_arr);

}



function get_info($url = '')

{

//然后可以把页面源码或者HTML片段传给QueryList

// sleep(1);

   $data = QueryList::get($url)->rules([ //设置采集规则

// 采集所有a标签的href属性

       'job' => ['div.info-primary > h3 > a > div.job-title', 'text'],

       'company' => ['div.info-company > div > h3 > a', 'text'],

       'tmp_experience' => ['div.info-primary > p', 'html'],

       'salary' => ['.red', 'text'],

       'tmp_education' => ['div.info-primary > p', 'html'],

       'tmp_people' => ['div.info-company > div > p', 'html'],

       'link' => ['div.info-primary > h3 > a', 'href'],

   ])->query()->getData();

//返回结果

   return $data->all();

}



function make_data($data)

{

//清除标签

   foreach ($data as $key => $value) {

// sleep(1);

       $detail_address = Querylist::get('https://www.zhipin.com' . $value['link'])->find('div.location-address')->text();

// sleep(1);

       $require = Querylist::get('https://www.zhipin.com' . $value['link'])->find('#main > div.job-box > div > div.job-detail > div.detail-content > div:nth-child(1) > div')->text();



       $value['require'] = $require;

       $value['detail_address'] = $detail_address;

       $value['type'] = 'php';

       $value['city'] = '郑州';



       $value['experience'] = explode('<em class="vline"></em>', $value['tmp_experience'])[1];

       $value['education'] = explode('<em class="vline"></em>', $value['tmp_education'])[2];

       $value['people'] = explode('<em class="vline"></em>', $value['tmp_people'])[sizeof(explode('<em class="vline"></em>', $value['tmp_people']))-1];



       $data[$key] = $value;



   }

   return $data;

}



function make_sql($data)

{

//将数据写入tmp_sql.sql文件内

   $file = fopen('tmp_sql.sql', 'a');

   foreach ($data as $key => $value) {

       $job = $value['job'];

       $company = $value['company'];

       $city = $value['city'];

       $type = $value['type'];

       $detail_address = $value['detail_address'];

       $require = $value['require'];

       $people = $value['people'];

       $experience = $value['experience'];

       $salary = $value['salary'];

       $education = $value['education'];

       $sql = "insert into jobs values('{$job}','{$company}','{$city}','{$type}','{$detail_address}','{$require}','{$experience}','{$salary}','{$education}','{$people}');\n";

       fwrite($file, $sql);

   }

   fclose($file);

}






文章评论

    点击加载更多评论

我的名片

网名:Hello World

职业:PHP开发

现居:福建省-福州市

Email:565554856@qq.com

  • 图片信息

站点信息

  • 建站时间:2019-04-20
  • 文章总计:69条
  • 笔记总计:4条
  • 文章评论:0条
  • 笔记评论:0条
  • 当前访问IP:3.144.69.178