| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 | import sysimport time# from sys import platform#!/usr/bin/python# -*- coding: utf8 -*-from bs4 import BeautifulSoupimport requestsimport timestart=time.time()fi=open("linksthethao.txt","r")fo=open("baothethao2.txt","w")i=0for line in fi.readlines():    link=line.strip()    url=requests.get(link)    t_soup = BeautifulSoup(url.text, 'lxml')    for headline in t_soup.findAll('h1', {'class': 'details__headline cms-title'}):        fo.write(headline.text)        fo.write('\n')    for description in t_soup.findAll('div', {'class': 'sapo cms-desc'}):        fo.write(description.text)        fo.write('\n')    str = ''    for contents in t_soup.findAll('div', {'class': 'cms-body detail'}):        for content in contents.findAll('p'):            fo.write(content.text)            fo.write('\n')    i+=1    print(i)    if (i==50): breakfi.close()fo.close()print("--- %s seconds ---" % (time.time() - start))
 |