RSStest.py 1012 B

123456789101112131415161718192021222324252627282930313233343536373839
  1. from bs4 import BeautifulSoup
  2. import requests
  3. import time
  4. start=time.time()
  5. def crawl():
  6. f = open("testingrss.txt", "w")
  7. url = requests.get('https://vnexpress.net/rss/the-thao.rss')
  8. soup = BeautifulSoup(url.content, 'xml')
  9. items = soup.find_all('item')
  10. for item in items:
  11. title = item.title.text
  12. print(title + '\n')
  13. # -------------------------------------------
  14. url = requests.get('https://vnexpress.net/rss/thoi-su.rss')
  15. soup = BeautifulSoup(url.content, 'xml')
  16. items = soup.find_all('item')
  17. for item in items:
  18. title = item.title.text
  19. print(title + '\n')
  20. # -------------------------------------------
  21. url = requests.get('https://vnexpress.net/rss/giao-duc.rss')
  22. soup = BeautifulSoup(url.content, 'xml')
  23. items = soup.find_all('item')
  24. for item in items:
  25. title = item.title.text
  26. print(title + '\n')
  27. # ------------------
  28. f.close()
  29. crawl()
  30. print("--- %s seconds ---" % (time.time() - start))