OiO.lk Community platform!

Oio.lk is an excellent forum for developers, providing a wide range of resources, discussions, and support for those in the developer community. Join oio.lk today to connect with like-minded professionals, share insights, and stay updated on the latest trends and technologies in the development field.
  You need to log in or register to access the solved answers to this problem.
  • You have reached the maximum number of guest views allowed
  • Please register below to remove this limitation

python selenium Headless chrome doesn't scroll

  • Thread starter Thread starter silverFoxA
  • Start date Start date
S

silverFoxA

Guest
I'm working on a web scrapper to collect Facebook post comments for analytics purposes.

On Facebook, after login, we can scroll the post page to get all the comments. Which dynamically loads the comments on the page scroll. Unfortunately, I can't get the page to scroll in the headless mode, though it works in non-headless mode.

I have referred the following posts - Post 1 Post 2

Here's my code

Code:
import datetime
import re
import time

from decouple import config
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
import yake
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"

options = Options()
options.add_argument('--disable-gpu-sandbox')
options.add_argument('--disable-gpu')
options.add_argument('--disable-software-rasterizer')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--no-sandbox')
options.add_argument("--window-size=1280,700")
options.add_argument("--headless=new")
options.add_argument(f"--user-agent={user_agent}")

driver = webdriver.Chrome(options=options)

driver.get("https://www.facebook.com/")
email_input = WebDriverWait(driver, 10).until(
    EC.presence_of_element_located((By.ID, "email"))
)
password_input = driver.find_element(By.ID, "pass")

email_input.send_keys(config("FB_EMAIL_INPUT"))
password_input.send_keys(config("FB_PASSWORD_INPUT"))
password_input.send_keys(Keys.RETURN)

time.sleep(1)
try:
    profile = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.XPATH, "//div[@aria-label='Your profile']"))
    )
    print("Login successful")
except NoSuchElementException:
    print("Login failed")


POST_URL = "https://www.facebook.com/thebetterindia/posts/pfbid025Yo2f5Qsd8NDL4AoFoHuvjeAURiRVc7rQ4uZBbULMuUWCfZ9NURRfeVha7aPpnn3l"
driver.get(POST_URL)

def infinite_scroll(driver, timeout=10):
    last_height = driver.execute_script("return document.body.scrollHeight")

    while True:
       driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
       time.sleep(timeout)
       new_height = driver.execute_script("return document.body.scrollHeight")
       if new_height == last_height:
          break

       last_height = new_height

try:
  infinite_scroll(driver, timeout=2)
except Exception as e:
  print(f"An exception occurred: {e}")
<p>I'm working on a web scrapper to collect Facebook post comments for analytics purposes.</p>
<p>On Facebook, after login, we can scroll the post page to get all the comments. Which dynamically loads the comments on the page scroll. Unfortunately, I can't get the page to scroll in the <code>headless</code> mode, though it works in non-headless mode.</p>
<p>I have referred the following posts - <a href="https://stackoverflow.com/questions...ium-can-only-find-ways-to-scroll-non-headless">Post 1</a> <a href=" " rel="nofollow noreferrer">Post 2</a></p>
<p>Here's my code</p>
<pre class="lang-py prettyprint-override"><code>import datetime
import re
import time

from decouple import config
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.wait import WebDriverWait
import yake
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException

user_agent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4515.159 Safari/537.36"

options = Options()
options.add_argument('--disable-gpu-sandbox')
options.add_argument('--disable-gpu')
options.add_argument('--disable-software-rasterizer')
options.add_argument('--disable-dev-shm-usage')
options.add_argument('--no-sandbox')
options.add_argument("--window-size=1280,700")
options.add_argument("--headless=new")
options.add_argument(f"--user-agent={user_agent}")

driver = webdriver.Chrome(options=options)

driver.get("https://www.facebook.com/")
email_input = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.ID, "email"))
)
password_input = driver.find_element(By.ID, "pass")

email_input.send_keys(config("FB_EMAIL_INPUT"))
password_input.send_keys(config("FB_PASSWORD_INPUT"))
password_input.send_keys(Keys.RETURN)

time.sleep(1)
try:
profile = WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, "//div[@aria-label='Your profile']"))
)
print("Login successful")
except NoSuchElementException:
print("Login failed")


POST_URL = " "
driver.get(POST_URL)

def infinite_scroll(driver, timeout=10):
last_height = driver.execute_script("return document.body.scrollHeight")

while True:
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(timeout)
new_height = driver.execute_script("return document.body.scrollHeight")
if new_height == last_height:
break

last_height = new_height

try:
infinite_scroll(driver, timeout=2)
except Exception as e:
print(f"An exception occurred: {e}")

</code></pre>
 

Latest posts

I
Replies
0
Views
1
impact christian
I
Top