Suggest_news.py 1.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778
  1. import time
  2. import re
  3. # from sys import platform
  4. #!/usr/bin/python
  5. # -*- coding: utf8 -*-
  6. from bs4 import BeautifulSoup
  7. from bs4 import element
  8. import bs4
  9. import requests
  10. start=time.time()
  11. news={}
  12. #news[0]={'title':'','link':'','content':''}
  13. def scraping_soup(link, category, page):
  14. url = requests.get(link)
  15. if (page=="vnexpress"):
  16. soup = BeautifulSoup(url.content, 'lxml')
  17. else:
  18. soup = BeautifulSoup(url.content, 'html.parser')
  19. items = soup.findAll('item')
  20. i=0
  21. for item in items:
  22. title = item.title.text
  23. link = item.guid.text
  24. description = item.description.text
  25. print(title)
  26. #--------
  27. news[len(news)] = {'title': title, 'link': link, 'content': title + ' ' + description, 'category': category, 'page':page}
  28. i+=1
  29. if i==30: break
  30. #def Suggest_news_thethao():
  31. #scraping_soup('https://vnexpress.net/rss/the-thao.rss','thethao', 'vnexpress')
  32. def Suggest_news_thoisu_chinhtri():
  33. scraping_soup('https://vnexpress.net/rss/thoi-su.rss', 'thoisu', 'vnexpress')
  34. #scraping_soup('https://vtv.vn/trong-nuoc/chinh-tri.rss', 'thoisu')
  35. scraping_soup('https://toquoc.vn/rss/thoi-su-1.rss', 'thoisu','toquoc')
  36. #scraping_soup('https://baotintuc.vn/thoi-su.rss', 'thoisu', 'baotintuc')
  37. #scraping_soup('https://vietnamnet.vn/rss/thoi-su.rss', 'thoisu', 'vietnamnet')
  38. # scraping_soup('https://laodong.vn/rss/thoi-su.rss', 'thoisu', 'laodong')
  39. #def Suggest_news_vanhoa():
  40. #scraping_soup('https://toquoc.vn/rss/van-hoa-10.rss', 'vanhoa', 'toquoc')
  41. #scraping_soup('https://baotintuc.vn/van-hoa.rss', 'vanhoa', 'baotintuc')
  42. #scraping_soup('https://laodong.vn/rss/van-hoa-giai-tri.rss', 'vanhoa', 'laodong')
  43. #Suggest_news_thethao()
  44. Suggest_news_thoisu_chinhtri()
  45. #Suggest_news_vanhoa()
  46. print(news)