类似案例:DrissionPage实现爬取51job

逻辑与之前项目相似,不再赘述。

以下是基础代码框架


from DrissionPage import WebPage
from DrissionPage import ChromiumOptions
import time

path = r'C:\Program Files (x86)\Microsoft\Edge\Application\msedge.exe'
ChromiumOptions().set_browser_path(path).save()
wp=WebPage()
wp.listen.start('/api/job/search-pc')
wp.get("https://we.51job.com/pc/search?jobArea=020000&keyword=python%20%E7%88%AC%E8%99%AB&searchType=2&degree=03&sortType=0&metro=")

while True:
    time.sleep(2)
    packet=wp.listen.wait()
    job_list=packet.response.body['resultbody']['job']['items']
    for job in job_list:
        try:
            jobName=job['jobName']
          
            jobAreaString=job['jobAreaLevelDetail']['districtString']
            jobSalaryMax=job['jobSalaryMax']
            jobSalaryMin=job['jobSalaryMin']
            companyName=job['companyName']
            jobDescribe=job['jobDescribe']
            jobTags=job['jobTags']
            jobTagsForOrder=job['jobTagsForOrder']
            # data_row=[jobName,jobAreaString,jobSalaryMax,
            #         jobSalaryMin,companyName,jobDescribe,
            #         jobTags,jobTagsForOrder]
            data_row=[jobName,jobAreaString,jobSalaryMax,
                    jobSalaryMin,companyName,jobDescribe,
                    ]
            print(data_row)
        except:
            pass
    try:
        wp.ele(".el-icon el-icon-arrow-right").click()

        time.sleep(2)
    except Exception as e :
        print(e)
        break