img
img
img

既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上大数据知识点,真正体系化!

由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新

需要这份系统化资料的朋友,可以戳这里获取

browser = webdriver.Chrome(options=options)
browser.execute_cdp_cmd(“Page.addScriptToEvaluateOnNewDocument”, {
“source”: “”"
Object.defineProperty(navigator, ‘webdriver’, {
get: () => undefined
})
“”"
})

def getData(url):
# 访问网址
browser.get(url)
browser.implicitly_wait(10)
# 设置浏览器大小:全屏
browser.minimize_window()
# 在搜索框元素输入要搜索的岗位
‘’’
browser.find_element(By.CSS_SELECTOR, “.home-body-wrapper .column-search-panel .ipt-search”).send_keys(job)
# 点击搜索
browser.find_element(By.CSS_SELECTOR, “.search-panel-new .btn-search”).click()
‘’’
# 等待6秒,试页面加载完成,否则会一直刷新页面直至页面加载完成
time.sleep(10)
‘’‘定义列表,分别为:地址、岗位名称、公司名称、薪资、经验要求、学历要求、待遇’‘’
‘’‘创建表头’‘’
# with open(‘数据.csv’, ‘a+’, newline=‘’, encoding=‘utf-8-sig’) as csvfile:
# fieldnames = [‘address’, ‘job_name’, ‘company_name’, ‘company_type’, ‘company_people’, ‘salary’,
# ‘experience’, ‘education’, ‘skills’, ‘benefits’, ‘job_desc’] # 表头·
# writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
# writer.writeheader()
for i in range(1, 11):
browser.implicitly_wait(10)
ul = browser.find_elements(By.CSS_SELECTOR, ‘.job-card-wrapper’)
time.sleep(2)
print(“开始爬取第” + str(i) + “页信息”)
if len(ul) >= 1:
for li in ul:
try:
time.sleep(2)
‘’‘获取岗位地址’‘’
address = li.find_element(By.CSS_SELECTOR,
‘.job-card-wrapper .job-card-left .job-area-wrapper .job-area’).text
# address1 = address.split(‘·’)[0]
‘’‘获取岗位名称’‘’
job_name = li.find_element(By.CSS_SELECTOR,
‘.job-card-wrapper .job-card-left .job-name’).text
‘’‘获取公司名称’‘’
company = li.find_element(By.CSS_SELECTOR, ‘.job-card-wrapper .job-card-right .company-name a’).text

                '''公司类型'''
                company_type = li.find_element(By.CSS_SELECTOR,
                                               '#wrap > div.page-job-wrapper > div.page-job-inner > '
                                               'div > div.job-list-wrapper > div.search-job-result > '
                                               'ul > li > div.job-card-body.clearfix > div > '
                                               'div.company-info > ul > li:nth-child(1)').text
                '''公司规模'''
                company_people = li.find_element(By.CSS_SELECTOR,
                                                 '#wrap > div.page-job-wrapper > div.page-job-inner '
                                                 '> div > div.job-list-wrapper > '
                                                 'div.search-job-result > ul > li> '
                                                 'div.job-card-body.clearfix > div > '
                                                 'div.company-info > ul > li:last-child').text

                '''获取薪资水平'''
                money = li.find_element(By.CSS_SELECTOR, '.job-card-wrapper .job-card-left .salary').text
                '''经验'''
                experience = li.find_element(By.CSS_SELECTOR,
                                             '.job-card-wrapper .job-card-left .tag-list :first-child').text
                experience = str(experience)
                '''获取学历要求'''
                education = li.find_element(By.CSS_SELECTOR,
                                            '.job-card-wrapper .job-card-left .tag-list li+li').text
                if '月' in education:
                    education = '本科'
                else:
                    education = education.strip('\n')
                '''技能要求'''
                skill_list = li.find_elements(By.CSS_SELECTOR,
                                              '#wrap > div.page-job-wrapper > div.page-job-inner > '
                                              'div > div.job-list-wrapper > div.search-job-result > '
                                              'ul > li > div.job-card-footer.clearfix > ul > li')
                skill = []
                for skill_i in skill_list:
                    skill_i_text = skill_i.text
                    if len(skill_i_text) == 0:
                        continue
                    skill.append(skill_i_text)
                skill = str(skill)

                '''福利待遇'''
                benefit = li.find_element(By.CSS_SELECTOR, '.job-card-wrapper .info-desc').text

                try:
                    '''岗位描述'''
                    li.find_element(By.CSS_SELECTOR,
                                    "#wrap > div.page-job-wrapper > div.page-job-inner > div > "
                                    "div.job-list-wrapper > div.search-job-result > ul > li > "
                                    "div.job-card-body.clearfix > a").click()
                except ElementClickInterceptedException:
                    print("正在关闭弹窗")
                    browser.find_element(By.CSS_SELECTOR, ".boss-login-dialog-content .boss-login-dialog-header .boss-login-close").click()
                    print("关闭成功")
                    li.find_element(By.CSS_SELECTOR,
                                    "#wrap > div.page-job-wrapper > div.page-job-inner > div > "
                                    "div.job-list-wrapper > div.search-job-result > ul > li > "
                                    "div.job-card-body.clearfix > a").click()
                    # browser.execute_script('$(".login-dialog-wrapper").css("display","none")')
                # 找到详情页url并打开
                time.sleep(5)
                # 将窗口移动到最后一个标签页
                browser.switch_to.window(browser.window_handles[-1])

                job_details = browser.find_element(By.XPATH, '//*[@id="main"]/div[3]/div/div[2]/div[1]/div[2]').text
                # job_details = browser.find_element_by_xpath('//*[@id="main"]/div[3]/div/div[2]/div[1]/div[2]').text
                # print(job_details)
                time.sleep(1)
                # 关闭详情页
                browser.close()
                browser.switch_to.window(browser.window_handles[-1])

                '''打印输出'''
                print(
                    address + ',' + job_name + ',' + company + ',' + company_type + ',' + company_people + ','
                    + money + ',' + experience + ',' + education + ',' + skill + ',' + benefit + ',' + job_details.replace(
                        "\n", ""))

                with open('数据1.csv', 'a+', newline='', encoding='utf-8-sig') as csvfile:
                    fieldnames = ['address', 'job_name', 'company_name', 'company_type', 'company_people', 'salary',
                                  'experience', 'education', 'skills', 'benefits', 'job_desc']
                    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
                    writer.writerow({'address': address, 'job_name': job_name, 'company_name': company,
                                     'company_type': company_type, 'company_people': company_people, 'salary': money,
                                     'experience': experience, 'education': education, 'skills': skill,
                                     'benefits': benefit,
                                     'job_desc': job_details.replace("\n", "")
                                     })
                time.sleep(10)
            except UnicodeEncodeError:
                continue
        time.sleep(10)
        '''利用滑块,使得页面得以跳动,模拟人工'''
        js = 'window.scrollTo(0,2000)'
        browser.execute_script(js)  # 读不懂就对了,这是js代码,滑动滑块的
        time.sleep(3)
        browser.find_element(By.CSS_SELECTOR, "#wrap > div.page-job-wrapper > div.page-job-inner > div > "
                                              "div.job-list-wrapper > div.search-job-result > div > div > div > "
                                              "a:last-child").click()

        time.sleep(8)
    else:
        print('没有内容,停止运行')
        break

if name == ‘main’:
# 数据开发、数据分析、ETL、数据仓库、数据挖掘、“ETL工程师”,“数据仓库”,
job_name = [“数据挖掘”]
for job in job_name:
# 北京、上海、广州、深圳、杭州、天津、西安、
# 苏州、武汉、厦门、长沙、成都、郑州、重庆
#“101010100”,“101020100”, “101280100”,“101280600”,“101210100”,“101030100”,“101110100”,“101190400”,
place = [“101200100”,“101230200”,“101250100”,“101270100”,“101180100”,“101040100”]
i = 0
print(“开始爬取”+str(job)+“的岗位信息”)
for p in place:
job_url = “https://www.zhipin.com/web/geek/job?query=” + job + “&city=” + p

        getData(job_url)
        i = i + 1
        print(str(job)+"的第" + str(i) + "所城市爬取完成")
    print(str(job)+"岗位爬取完成")

 结果展示:


![](https://img-blog.csdnimg.cn/4e85c1b0b39a472c872cadf8debc3293.png)


 


## 2.数据分析



package Job.DataProcess

import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.{col, desc, round}

/**

  • 数据开发岗位
    */
    object DataDev {
    def main(args: Array[String]): Unit = {

    val spark = SparkSession.builder()
    .master(“local[4]”)
    .appName(“Test”)
    .getOrCreate()

    val data = spark.read
    .option(“header”, value = true)
    .option(“delimiter”, “,”)
    .option(“inferSchema”, value = true)
    .csv(“file:\D:\桌面文件\毕设\数据\招聘数据.csv”)

img
img

网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。

需要这份系统化资料的朋友,可以戳这里获取

一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!

题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。**

需要这份系统化资料的朋友,可以戳这里获取

一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!

Logo

永洪科技,致力于打造全球领先的数据技术厂商,具备从数据应用方案咨询、BI、AIGC智能分析、数字孪生、数据资产、数据治理、数据实施的端到端大数据价值服务能力。

更多推荐