鍍金池/ 問答/Python/ Python使用requests下載文件問題

Python使用requests下載文件問題

最近在爬一個網(wǎng)站,想直接下載其中的torrent文件,發(fā)現(xiàn)該torrent文件在下載頁面是點(diǎn)擊下載按鈕,提交一個form表單到后臺,然后開始下載,使用python requests提交表單,但是下載下來的文件是論壇的首頁,不知道哪里出錯了,有大佬幫看看嗎?以下是代碼,老司機(jī)懂的.

def down_torrent(link):
    headers = {
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8"
    "Accept-Encoding": "gzip, deflate"
    "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8"
    "Cache-Control": "max-age=0"
    "Connection": "keep-alive"
    "Host": "www3.uptorrentfilespacedownhostabc.info"
    "Upgrade-Insecure-Requests": "1"
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.360"
    }

    torrent_html = requests.get(link)
    torrent_html.encoding == 'utf-8'

    soup = BeautifulSoup(torrent_html.text, 'lxml')

    input_list = soup.find_all('input')

    down_parms = {}  #POST提交的參數(shù)
    for i in input_list:
        if i.get('name'):
            down_parms[i.get('name')] = i.get('value')
    
    #POST地址        
    post_link = link.split('file.php')[0] + 'down.php'
    
    #下載文件保存位置
    down_dir = os.path.join(current_path, down_parms['name'])
    
    if not os.path.exists(down_dir):
        os.mkdir(down_dir)
    down_path = os.path.join(down_dir, down_parms['name'])
    down_path = down_path + '.torrent'
    s = requests.Session()
    torrent_html = s.post(post_link, headers=headers, data=down_parms)
    with open(down_path, 'wb') as f:
        for chunk in torrent_html.iter_content(10):
            f.write(chunk)



if __name__ == '__main__':
    down_torrent('http://www3.uptorrentfilespacedownhostabc.info/updowm/file.php/P22OGZq.html')
回答
編輯回答
挽歌

恩,很黃很暴力,我很喜歡

import requests
from pyquery import PyQuery as Q

url = 'http://www3.uptorrentfilespacedownhostabc.info/updowm/file.php/P22OGZq.html'
post_url = 'http://www3.uptorrentfilespacedownhostabc.info/updowm/down.php'

session = requests.Session()
session.headers['Referer'] = url

r = session.get(url)
inputs = Q(r.text).find('input[type="hidden"]')
data = {Q(_).attr('name'): Q(_).attr('value') for _ in inputs}
r = session.post(post_url, data=data)

file_name = '{0}.torrent'.format(data['name'])
with open(file_name, 'wb') as f:
    f.write(r.content)
2018年4月22日 07:53
編輯回答
女流氓

我試了下,header里面必須有"Referer",否則會請求超時,但是并沒出現(xiàn)響應(yīng)是首頁的情況

import requests


def download():
    url = 'http://www3.uptorrentfilespacedownhostabc.info/updowm/down.php'
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit \
            /537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image \
            /webp,image/apng,*/*;q=0.8',
        'Accept-Encoding': 'gzip, deflate',
        'Accept-Language': 'zh-CN, zh; q=0.9',
        'Referer': 'http://www3.uptorrentfilespacedownhostabc.info/updowm/file.php/P22OGZq.html',
        # 'Origin': 'http://www3.uptorrentfilespacedownhostabc.info',
        'Upgrade-Insecure-Requests': '1',
        'Host': 'www3.uptorrentfilespacedownhostabc.info',
    }
    post_data = {'type': 'torrent',
                'id': 'P22OGZq',
                'name': '55d19496b8995be1f2c1118daa47ba4c4c8b98ec'}

    response = requests.post(url, headers=headers, data=post_data)

    if response.status_code == 200:
        print(response.text)
    else:
        print(response.status_code)
2018年7月25日 23:33
編輯回答
歆久

樓上兩位大佬說的對,就是缺了Referer,加上了就好可以了.哎,營養(yǎng)要跟不上了!!

2018年5月11日 01:10