fellow developers!
I’m currently working on a PHP project where I need to:
Extract all the URLs (especially links to downloadable files) from a webpage. ex: https://samplefile.download/sample-mp3-files/ Display these URLs so that I can choose which files to download. Finally, programmatically download only the selected files.
<?php
function get_webpage_content($url) {
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
$content = curl_request($ch); curl_exec($ch)
curl_close($ch);
return $content;
}
function extract_urls($content) {
$pattern = '/<a\s+(?:[^>]*?\s+)?href=["\']([^"\']*)["\']/i';
preg_match_all($pattern, $content, $matches);
return $matches[1];
}
function download_files($urls, $save_directory) {
foreach ($urls as $url) {
$file_name = basename(parse_url($url, PHP_URL_PATH));
$file_path = $save_directory . DIRECTORY_SEPARATOR . $file_name;
$ch = curl_init($url);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, false);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, false);
$file_data = curl_exec($ch);
curl_close($ch);
if ($file_data) {
file_put_contents($file_path, $file_data);
echo "Downloaded: $file_name\n";
} else {
echo "Failed to download: $url\n";
}
}
}
$url="https://samplefile.download/sample-mp3-files/";
$save_directory = 'downloads';
if (!is_dir($save_directory)) {
mkdir($save_directory, 0777, true);
}
$webpage_content = get_webpage_content($url);
$urls = extract_urls($webpage_content);
download_files($urls, $save_directory);
?>
You need to sign in to view this answers