OiO.lk Blog HTML I tried web scraping on this sites but it won't work
HTML

I tried web scraping on this sites but it won't work


it keeps telling me this :the error page

These are the two websites:
(https://secure.ethicspoint.com/domain/en/default_reporter.asp)
(https://app.convercent.com/en-us/Anonymous/IssueIntake/IdentifyOrganization)

I’m attempting to use both static scraping with requests and dynamic scraping with Selenium, but I keep running into an issue where I either get an error page or I can’t extract the necessary elements (like dropdowns with company names)

This is the code I used

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.service import Service as ChromeService
from webdriver_manager.chrome import ChromeDriverManager
from selenium.webdriver.common.by import By
import time

driver = webdriver.Chrome(service=ChromeService(ChromeDriverManager().install()))

def static_scrape(url):
   
    try:
       
        response = requests.get(url)

        if response.status_code == 200:
            
            soup = BeautifulSoup(response.text, 'html.parser')

            options = soup.find_all('option')

            if options:
                companies = [option.text.strip() for option in options if option.text.strip()]
                return companies
            else:
                print("No company names found in static content.")
                return None
        else:
            print(f"Failed to retrieve webpage. Status code: {response.status_code}")
            return None
    except Exception as e:
        print(f"Error in static scraping: {e}")
        return None

def dynamic_scrape(url):
   
    try:
        
        driver.get(url)

        
        time.sleep(5)

        dropdown = driver.find_element(By.TAG_NAME, 'select')

        
        options = dropdown.find_elements(By.TAG_NAME, 'option')

       
        companies = [option.text.strip() for option in options if option.text.strip()]

        return companies

    except Exception as e:
        print(f"Error in dynamic scraping: {e}")
        return None

def scrape_companies(url):
    
    print(f"Attempting static scraping for {url}...")
    companies = static_scrape(url)

    if companies is None:
        print(f"Static scraping failed, attempting dynamic scraping for {url}...")
        companies = dynamic_scrape(url)

    return companies


urls = [
    'https://secure.ethicspoint.com/domain/en/default_reporter.asp',
    'https://app.convercent.com/en-us/Anonymous/IssueIntake/IdentifyOrganization'
]


for url in urls:
    companies = scrape_companies(url)

    if companies:
        print(f"\nCompanies found on {url}:")
        for company in companies:
            print(f"- {company}")
    else:
        print(f"No companies found on {url}.\n")


driver.quit()



You need to sign in to view this answers

Exit mobile version