python爬虫示例，抓取猫眼电影Top100

from urllib import request,error
import re,time,json
def getPage(url):
    '''爬取指定url页面信息'''
    try:
        #定义请求头信息
        headers = {
            'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.130 Safari/537.36'
        }
        # 封装请求对象
        req = request.Request(url,headers=headers)
        # 执行爬取
        res = request.urlopen(req)
        #判断响应状态,并响应爬取内容
        if res.code == 200:
            return res.read().decode("utf-8")
        else:
            return None
    except error.URLError:
        return None

def parsePage(html):
    '''解析爬取网页中的内容，并返回字段结果'''
    #定义解析正则表达式
    pat = '<p class="name"><a.*?href="(.*?)"[^<>]*>(.*?)</a></p>\s+<p class="star">\s+(.*?)\s+</p>\s+<p class="releasetime">(.*?)</p>'
    #执行解析
    items = re.findall(pat,html,re.S)
    #遍历封装数据并返回
    print(items)

url = "https://maoyan.com/board/4"
html = getPage(url)
parsePage(html)
打赏赞