鍍金池/ 問答/Python  HTML/ 【python小白】寫爬蟲代碼遇到問關(guān)于異常處理的問題

【python小白】寫爬蟲代碼遇到問關(guān)于異常處理的問題

代碼如下:

# -*- coding:utf-8 -*-
from urllib.request import urlopen
import bs4
import webbrowser
import requests

html_list = [] #存放要爬的所有網(wǎng)頁
html_list_txt = [] #存放要爬的所有網(wǎng)頁的源代碼
movie_list = [] #存放所有電影
#movie_total = {} #存放所有電影資料的字典

#def add_movie():


html = 'https://movie.douban.com/top250'
html_list.append(html)
#html2 = requests.get('https://movie.douban.com/top250')
#webbrowser.open(html)
#webbrowser.open(html)
html_txt = (urlopen(html)).read()
#html_txt = (requests.get(html)).text

bsObj = bs4.BeautifulSoup(html_txt, 'html.parser')
print('---1---')
html_div = bsObj.find('div',{'class':'paginator'})
print('---2---')
html_a = html_div.findAll('a')
print('---3---')
for html_a_temp in html_a:
#    print(type(html_a_temp))
    #i = 2
   #while i <= 9:
    #print(html_a_temp.get_text())
    #if html_a_temp.get_text != '后頁>':
        html_href = html_a_temp.attrs['href']
        html_ + html_href
        html_list.append(html_href)
        #i += 1
    
print('---4---')
html_list = list(set(html_list))
print(len(html_list))
#print(html_set)
#print(len(html_set))
#print(set(html_list))
#print(type('后頁'))

#上面的代碼為找到所有的鏈接
for html_list_temp in html_list:
    '''把所有要爬的鏈接全部轉(zhuǎn)換成源代碼并存儲'''
    html_read = bs4.BeautifulSoup(urlopen(html_list_temp).read(), 'html.parser')
    html_list_txt.append(html_read)

for html_page in html_list_txt:
    name_div_list = html_page.findAll('div',{'class':'info'})
    for name_div_temp in name_div_list:
        movie_total = {} #創(chuàng)建一個局部變量的字典,用來存放臨時找到的某個電影的資料
        name_div_inside = name_div_temp.findAll('div')

        movie_name = name_div_inside[0].a.span.get_text()  #找到電影名字
        name_div_star = name_div_temp.find('div',{'class':'star'})
        name_div_star_span = name_div_star.findAll('span')
        movie_score = name_div_star_span[1].get_text()  #找到電影評分
        movie_number = name_div_star_span[3].get_text()  #找到電影評價人數(shù)
        # ------分隔符-----
        try:
            movie_introduction = name_div_temp.find('span',{'class':'inq'}).get_text()#找到電影簡評
        except AttributeError:
            print("這部電影沒有簡評~~~~~~~~~~~")
        print(movie_introduction)
        #name_span_inq = name_div_temp.findAll('p')[1].span.get_text()
        #movie_introduction = name_span_inq

        #name_span_inq = name_div_temp.find('span',{'class':'inq'})
        #movie_introduction = name_span_inq.get_text()  #找到電影簡評
        movie_total['name'] = movie_name
        movie_total['score'] = movie_score
        movie_total['number'] = movie_number
        movie_total['introduction'] = movie_introduction
        movie_list.append(movie_total)
print(movie_list)

'''
        name_div_inside_span_list = name_div_inside[1].div.findAll('span')
        for name_div_inside_span_temp in name_div_inside_span_list:
            movie_score = name_div_inside[1].div.span[1].get_text() #找到電影評分
            movie_number = name_div_inside[1].div.span[3].get_text() #找到評價人數(shù)
'''
#movie_total[name] = movie_name
'''
    name_div_list = html_page.findAll('div',{'class':'hd'})
    for name_div_temp in name_div_list:
        movie_name = name_div_temp.a.span.get_text()
        movie_name_list.append(movie_name)
'''
#print(movie_name_list)

運(yùn)行結(jié)果如下:

clipboard.png
藍(lán)色箭頭位置,拋出了異常處理,但是緊接著,前一部電影的簡評又被打印了一次。這是怎么回事呢?

clipboard.png

clipboard.png

回答
編輯回答
純妹

try:這里的冒號是全角

2018年1月27日 17:47