魔域sf刷蜘蛛抖音搜索视频采集

[Python] 纯文本查看 复制代码

import requests import os import time def string_to_dict(cookie_string): cookie_dict = {} for cookie in cookie_string.split(';'): if '=' in cookie: key, value = cookie.strip().split('=', 1) cookie_dict[key] = value return cookie_dict ##请注意,魔域sf刷蜘蛛务必使用自己电脑上搜索网页产生的cookies cookie_str = 'UIFID_TEMP=9813fed978a3c771be2b666bdcfa929d1334ed722b7c12524b489df48e588a5c51cb117a18fa5735b4a61dbc57f41dfe5b73b2fdf0e28ff9931f9e523880ce37ed1447cd909e4869637c2adef01c068d; fpk1=U2FsdGVkX1+tFweCK8NMTa4xF84b75AAt0tPzCi5lS/09031IV8EDhEJ5AIx1SdEmiVT9pTMWza7XYZiLdbX6g==; fpk2=0a4ec5943b80707adbc82b48683207dd; UIFID=9813fed978a3c771be2b666bdcfa929d1334ed722b7c12524b489df48e588a5c25cfe601d9a654bcdf603de3057a5ca0e42c655d3730a355c58a5acf3e8567593342557d2bc1e7c4581fc67f55491c54407b25ecddf2ab6b67adf6cad934d3c8c4857a0e790b505bdeaacee778eca9e6cb649d7394bb142724e0e1b544f1b060e17a38a3ab2259409372d999225e429542bcb903565408618e8fc0d2c50c5500; hevc_supported=true; xgplayer_device_id=14451711088; xgplayer_user_id=277492132112; SEARCH_RESULT_LIST_TYPE=%22single%22; is_dash_user=1; passport_csrf_token=f5c2a1a860aca5b614d19eb832404b8d; passport_csrf_token_default=f5c2a1a860aca5b614d19eb832404b8d; __security_mc_1_s_sdk_crypt_sdk=2a3419f7-4a28-9bca; bd_ticket_guard_client_web_domain=2; volume_info=%7B%22isUserMute%22%3Afalse%2C%22isMute%22%3Afalse%2C%22volume%22%3A1%7D; s_v_web_id=verify_ma1zdq2m_T58IKTlX_JSyp_4aiD_ASnw_P6zqSHe9CDaA; ttwid=1%7C9J-wbAEBpW1RbU_nrLYSAiTl9ShIyGjygHrlsF-rVio%7C1745939026%7Cc57869d5101670dce1fe34bc9fb4e3632d6a2160f0059810dd915b8f513ea2a6; passport_mfa_token=Cjd6%2Bwrz7RHQuLwuVbMpeJ0xd8Peerc1iMheAUOfq%2BzFgJ48xO4qDeQtMHigHPD3UGTVC31CZFAWGkoKPAAAAAAAAAAAAABO7%2BpfTPg%2BXOZXCB76%2B3eZlgH0CEI9Anla5F4rtgu9yMKnul4nGXGbIOj9KIN94n%2BNfBD4hvANGPax0WwgAiIBA%2Bd22eo%3D; d_ticket=1aefd782c7eac07793b6028676b2011c99105; passport_assist_user=CkGWKuT-Nt6Y-Hxc_VNFM33kHrke1s_-ftti6qnE4YbNimlcOw8DHxQiDSynPbIp7JYqOPXyYxazzBzyVSyiOhHkCBpKCjwqhVj9z6sweHl7zu4u_MmNZdUHBQ2wWH-IfdcGOCKl_eYMq9Wpf1IojNJ6H74pzawMkqw5emrPV2iMUJYQtobwDRiJr9ZUIAEiAQMNlH0J; n_mh=40vgplZPesxOieWeAud-o_mn_8l6Lg33PWrWzo10Rno; passport_auth_status=0a5f27eee16474ad6fe858963787654b%2C; passport_auth_status_ss=0a5f27eee16474ad6fe858963787654b%2C; sid_guard=d638a6c2f1639b58a0a7d53fa66ea9e6%7C1745939405%7C5184000%7CSat%2C+28-Jun-2025+15%3A10%3A05+GMT; uid_tt=f44a2e5049b95d3f0bd4ff608d9cafc7; uid_tt_ss=f44a2e5049b95d3f0bd4ff608d9cafc7; sid_tt=d638a6c2f1639b58a0a7d53fa66ea9e6; sessionid=d638a6c2f1639b58a0a7d53fa66ea9e6; sessionid_ss=d638a6c2f1639b58a0a7d53fa66ea9e6; is_staff_user=false; sid_ucp_v1=1.0.0-KDdlMDJhOTg2ZmI5NmM5MmQ1MmM3OTg2Mjk0ZTVmNDg2MGU1N2E4OGYKIQiuztCx2435BRDN18PABhjvMSAMMJPH7ZYGOAJA8QdIBBoCaGwiIGQ2MzhhNmMyZjE2MzliNThhMGE3ZDUzZmE2NmVhOWU2; ssid_ucp_v1=1.0.0-KDdlMDJhOTg2ZmI5NmM5MmQ1MmM3OTg2Mjk0ZTVmNDg2MGU1N2E4OGYKIQiuztCx2435BRDN18PABhjvMSAMMJPH7ZYGOAJA8QdIBBoCaGwiIGQ2MzhhNmMyZjE2MzliNThhMGE3ZDUzZmE2NmVhOWU2; login_time=1745939404074; SelfTabRedDotControl=%5B%5D; _bd_ticket_crypt_cookie=379b54ed005dab727dcab14c1d6d32ef; __security_mc_1_s_sdk_sign_data_key_web_protect=068970f3-4a9e-b45b; __security_mc_1_s_sdk_cert_key=0b34d9e3-493f-af14; __security_server_data_status=1; __ac_signature=_02B4Z6wo00f01pp45XgAAIDDiJWtRtq4KPqaWOHAAM6ZVwPvKnx.a-0hkPj74RO7nFTOOydWBG5KS0P5sIpHd9phrR7YhGXdyR.O-dC7dGbFzx7okwxoidJ.4ycY9RnoEp5h9uvJ.7lSKZne80; publish_badge_show_info=%220%2C0%2C0%2C1746606382125%22; download_guide=%223%2F20250508%2F0%22; csrf_session_id=7c0e798ff1522cdbef32df6e26638d9b; IsDouyinActive=true; bd_ticket_guard_client_data=eyJiZC10aWNrZXQtZ3VhcmQtdmVyc2lvbiI6MiwiYmQtdGlja2V0LWd1YXJkLWl0ZXJhdGlvbi12ZXJzaW9uIjoxLCJiZC10aWNrZXQtZ3VhcmQtcmVlLXB1YmxpYy1rZXkiOiJCTFNyZEZkZ3JlbVdneS9lUk9BZkhoVEZ4a1FPSlhPT0szVnFGK29pSGs1d2RHYTNheThvK2lxSERKV2U3cHV0SnVKaXB4dnltWEpwcldTeGZrTy9sWVk9IiwiYmQtdGlja2V0LWd1YXJkLXdlYi12ZXJzaW9uIjoyfQ%3D%3D; home_can_add_dy_2_desktop=%221%22; passport_fe_beating_status=true; odin_tt=952beaa801083cb75ef1c2b2859fa58b2e870e9bc2c03e31ee2ac84a6fc6008e223f70f242b544ce07b1de9c0316b56e6cf7f31e9a6e940e4526dff5736a1b0a; __ac_nonce=068228ea3003830f08b71' cookies = string_to_dict(cookie_str) print(cookies) class Spider(object): os_path = os.path.dirname(os.path.abspath(__file__)) + '/抖音搜索视频/' if not os.path.exists(os_path): os.mkdir(os_path) def __init__(self): self.url= 'https://www.douyin.com/aweme/v1/web/search/item/' self.headers= { 'accept': 'application/json, text/plain, */*', 'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6', 'cache-control': 'no-cache', 'pragma': 'no-cache', 'priority': 'u=1, i', 'referer': 'https://www.douyin.com/root/search/%E5%BC%A0%E4%BC%9F?type=video', 'sec-ch-ua': '"Microsoft Edge";v="135", "Not-A.Brand";v="8", "Chromium";v="135"', 'sec-ch-ua-mobile': '?0', 'sec-ch-ua-platform': '"Windows"', 'sec-fetch-dest': 'empty', 'sec-fetch-mode': 'cors', 'sec-fetch-site': 'same-origin', 'uifid': '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1', 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36 Edg/135.0.0.0', 'Cookie': f'{cookie_str}' } self.key = str(input('请输入您想采集的内容: ')) def parse_start_url(self, page_number): params = ( ('device_platform', 'webapp'), ('aid', '6383'), ('channel', 'channel_pc_web'), ('search_channel', 'aweme_video_web'), ('enable_history', '1'), ('keyword', self.key), ('search_source', 'normal_search'), ('query_correct_type', '1'), ('is_filter_search', '1'), #----------------------------------- ('sort_type', '1'), ('publish_time', '0'), ('from_group_id', ''), ('offset', page_number), ('count', '10'), ('need_filter_settings', '1'), ('list_type', 'multi'), ('update_version_code', '170400'), ('pc_client_type', '1'), ('pc_libra_divert', 'Windows'), ('support_h265', '1'), ('support_dash', '1'), ('version_code', '170400'), ('version_name', '17.4.0'), ('cookie_enabled', 'true'), ('screen_width', '1707'), ('screen_height', '960'), ('browser_language', 'zh-CN'), ('browser_platform', 'Win32'), ('browser_name', 'Edge'), ('browser_version', '135.0.0.0'), ('browser_online', 'true'), ('engine_name', 'Blink'), ('engine_version', '135.0.0.0'), ('os_name', 'Windows'), ('os_version', '10'), ('cpu_core_num', '32'), ('device_memory', '8'), ('platform', 'PC'), ('downlink', '10'), ('effective_type', '4g'), ('round_trip_time', '50'), ('webid', '7474844642175010344'), ('uifid', '3af258ad659545d9553f15cf32bb8a88df248991ebb865c20b5fa6f7dab6eb54e8d56c91612405705c23ef3169c754057359a8de37c8fd82c06b56053b9959d15d41fa3acf61ab30ac9b68a4d6560f5b522876f27befff69f38fe90a10431891784e6f60feb4a69d5369417c250b3fe1dc3cce6465b0755720c4b3ecd50009900a9692dd076221d8ed878bc103da6bf910d0cfd5b168c5fcac47ba2732e629e1'),#) ('msToken', 'yJ3OCyQmLkA3aYgRBx4gEVXtArb0A68nWhwJneWw-OOjdFLqZGeNcovnGg9P8_fAqKJGiYCZ761Fl8QV84CMjju1_LSCbefoH_DcswS2-3B0cUcRc6uX5SJdzU5NlvT3w5T3DcMnBlttd9yzIlgvj7F76SmQ0kY20KxpALs7e8UVBrReUA=='), ('a_bogus', 'mJsRgtWjxd/VPdFbmcTGt-3lrAgANP8yKpTdWCPT9NFEGwtcE8PIpxSXJxuPm7WR4mBkioNH4d-AafdcQTX0Z99kqmpkSwUSe42IVUfoMqiIbtwkLrfhSL8zqwBSUbwil/nWiIW5Us0EIEI5INAmApIGC5zLQmg2SqZCp2Y9JDSWps6TVn/1Ca26'), ) # 'msToken'和'a_bogus'动态变化,但是输入了一次,后面可以不管,没怎么做校验,我是直接给注释掉了。 response = requests.get(self.url, headers=self.headers, params=params,cookies=cookies).json() # print(response) self.save_data(response) def save_data(self,response): for data in response['data']: name=data['aweme_info']['desc'] if not name: name=data['aweme_info']['aweme_id'] for char in '<>:"/\\|?*\n': name = name.replace(char, '_') mp4_url=data['aweme_info']['video']['play_addr']['url_list'][-1] print(name,mp4_url) self.save_video(name,mp4_url) def save_video(self,name,mp4_url): video_data = requests.get(mp4_url, headers=self.headers).content with open(self.os_path + name + '.mp4', 'wb') as f: f.write(video_data) #time.sleep(1) print(f"视频 :{name} - - - 下载完成",'\n\n') if __name__ == '__main__': spider = Spider() page_number = 60 while True: spider.parse_start_url(page_number) page_number +=10 else: print(f'已下载{page_number}个视频') ##spider.parse_start_url(page_number)函数接口 page_number为视频个数,从0开始,每页+10, # 便于中途报错重新爬取,直接 FOR 循环输入大于page_number即可,是10的整数倍,这就是全部爬取了。

2025-07-03 04:01 点击量:1