it keeps telling me this :the error page
These are the two websites:
(https://secure.ethicspoint.com/domain/en/default_reporter.asp)
(https://app.convercent.com/en-us/Anonymous/IssueIntake/IdentifyOrganization)
I’m attempting to use both static scraping with requests and dynamic scraping with Selenium, but I keep running into an issue where I either get an error page or I can’t extract the necessary elements (like dropdowns with company names)
This is the code I used
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import time
driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))
def static_scrape(url):
try:
response = requests.get(url)
if response.status_code == 200:
soup = BeautifulSoup(response.text, 'html.parser')
options = soup.find_all('option')
if options:
companies = [option.text.strip() for option in options if option.text.strip()]
return companies
else:
print("No company names found in static content.")
return None
else:
print(f"Failed to retrieve webpage. Status code: {response.status_code}")
return None
except Exception as e:
print(f"Error in static scraping: {e}")
return None
def dynamic_scrape(url):
try:
driver.get(url)
time.sleep(5)
dropdown = driver.find_element(By.TAG_NAME, 'select')
options = dropdown.find_elements(By.TAG_NAME, 'option')
companies = [option.text.strip() for option in options if option.text.strip()]
return companies
except Exception as e:
print(f"Error in dynamic scraping: {e}")
return None
def scrape_companies(url):
print(f"Attempting static scraping for {url}...")
companies = static_scrape(url)
if companies is None:
print(f"Static scraping failed, attempting dynamic scraping for {url}...")
companies = dynamic_scrape(url)
return companies
urls = [
'https://secure.ethicspoint.com/domain/en/default_reporter.asp',
'https://app.convercent.com/en-us/Anonymous/IssueIntake/IdentifyOrganization'
]
for url in urls:
companies = scrape_companies(url)
if companies:
print(f"\nCompanies found on {url}:")
for company in companies:
print(f"- {company}")
else:
print(f"No companies found on {url}.\n")
driver.quit()
You need to sign in to view this answers