| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 | from bs4 import BeautifulSoupimport requestsfrom sys import platformimport time# from sys import platform#!/usr/bin/python# -*- coding: utf8 -*-from selenium import webdriverfrom selenium.webdriver.chrome.options import Optionsfrom pygologin.gologin import GoLoginfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support.ui import WebDriverWaitfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.common.exceptions import NoSuchElementExceptionfrom selenium.common.exceptions import StaleElementReferenceExceptionimport jsonfrom underthesea import nerfrom bs4 import BeautifulSoupimport requestsfrom googlesearch import *from datetime import datetimefrom datetime import timedelta"""TOKEN="eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiI2MzNkZDJlOWYwMzIwMjBkYWQwNDU2ZTciLCJ0eXBlIjoiZGV2Iiwiand0aWQiOiI2MzNkZDM0YWM5OWFmMmMzMzdkMjNmNGQifQ.7UmxqoGmN25EwG1DmN-2aJZqbBUY3R4hgKJciKgUwRg"link="https://ipinfo.io/"gl = GoLogin({	"token": TOKEN,    'tmpdir':"/tmp/",    "local":True,    "credentials_enable_service": False,})profile_id = gl.create({    "name": 'profile_1',    "os": 'mac',    "proxyEnabled": True,    "navigator": {        "language": 'en-US,en;q=0.9,he;q=0.8',        "userAgent": 'MyUserAgent',        "resolution": '1024x768',        "platform": 'darwin',    },    "proxy":{        'mode': 'http',        'host': "139.99.237.62",        'port': "80",        'username': "",        'password': "",    }});'host': "139.99.237.62",        'port': ,        'username': "",        'password': "",gl = GoLogin({	"token": TOKEN,    'profile_id':profile_id,})chrome_driver_path = "/Users/nguyenductai/Downloads/chromedriver"debugger_address = gl.start()chrome_options = Options()chrome_options.add_experimental_option("debuggerAddress", debugger_address)driver = webdriver.Chrome(executable_path=chrome_driver_path, options=chrome_options)driver.get(link)gl.delete(profile_id)driver.close()print("end session!")# ----------------------------"""link="https://toquoc.vn/van-hoa-khong-co-su-cao-thap-nho-hay-lon-ma-chi-co-su-da-dang-net-dac-sac-tieu-bieu-can-duoc-ton-trong-ton-vinh-phat-huy-giu-gin-20221006225030042.htm"news = {}t_title = ""t_description = ""t_contents = ''url = requests.get(link)t_soup = BeautifulSoup(url.text, 'lxml')for title in t_soup.findAll('h1', {'class': 'entry-title'}):    t_title = title.textfor description in t_soup.findAll('h2', {'class': 'sapo'}):    t_description = description.textfor contents in t_soup.findAll('div', {'data-role': 'content'}):    for content in contents.findAll('p'):        t_contents += content.text + ". "news = {'title': t_title, 'description': t_description, 'content': t_contents, 'category': "",'date':""}print(news)
 |