本文共 1335 字,大约阅读时间需要 4 分钟。
import requestsimport pandas as pdimport jsonimport reimport urllib.parsedef get_one_page(): params = { 'containerid': '100103type=60&q=新冠肺炎&t=0', 'page_type': 'searchall', 'page': 1 } url = 'https://m.weibo.cn/api/container/getIndex' # 发送请求 response = requests.get(url, params=params) response.encoding = 'utf-8' # 设置编码 response_text = response.text # 解析响应 try: data = json.loads(response_text) cards = data.get('data', {}).get('cards', []) except: cards = [] # 提取微博信息 one_page_data = [] for card in cards: if 'mblog' not in card: continue blog = card['mblog'] text = re.sub(r'<.*?>', '', blog['text']) # 去除HTML标签 mid = blog['mid'] reposts = blog['reposts_count'] comments = blog['comments_count'] likes = blog['attitudes_count'] # 创建数据元组 entry = (text, mid, reposts, comments, likes) one_page_data.append(entry) return one_page_data# 主函数if __name__ == '__main__': data = get_one_page() print(data) 主要优化点:
请注意:由于直接使用了微博的API,建议在实际使用前查看微博的使用政策,确保符合法律法规。
转载地址:http://cwrf.baihongyu.com/