diff --git a/DiskUtils.py b/DiskUtils.py index 8c7727d..c754e0f 100644 --- a/DiskUtils.py +++ b/DiskUtils.py @@ -3,25 +3,54 @@ import time import re import fnmatch -def dowork(): +def check_download_exists_matching_url_version_one(url): - ################################################################################################ + print ("enter > downloads > exist check > version 1") - url = "https://sullygnome.com/channel/kaicenat/2023january/streams" + match = re.search(r"https://sullygnome.com/channel/([^/]+)/(\d+)([a-z]+)/games", url.lower()) - print("checking disk for the stuff...") + if not match: - print("url", url) + print(f"Invalid URL format. Could not extract channel name, year, or month.") - fileExists = check_download_exists_matching_url(url) + return - print("file already exists / ",fileExists) + channel_name, year, month = match.groups() - ################################################################################################ + print("channel / ",channel_name) -def check_download_exists_matching_url(url): + #print("year / ",year) - print ("hello?") + #print("month / ",month) + + #file_pattern = f"{channel_name}*{month}*{year}*.csv" + + ## this is a problem. it works but if there 2 channels. + ## redbull and redbull2 this is throw false positives. ugh + + file_pattern = f"*{channel_name} - game stats on Twitch in {month} {year} - SullyGnome.csv".lower() + + for filename in os.listdir("/home/yankee/Downloads/"): + + updated = filename.lower() + + #print("file in downloads / ",updated) + + if fnmatch.fnmatch(updated, file_pattern): + + print(f"Found matching file: {updated}") + + return True + + else: + + print(f"No matching CSV file found for channel '{channel_name}' in year '{year}' and month '{month}'. (version one)") + + return False + +def check_download_exists_matching_url_version_two(url): + + print ("enter > downloads > exist check > version 2") match = re.search(r"https://sullygnome.com/channel/([^/]+)/(\d+)([a-z]+)/games", url.lower()) @@ -39,12 +68,14 @@ def check_download_exists_matching_url(url): print("month / ",month) - #file_pattern = f"{channel_name}*{month}*{year}*.csv" - ## this is a problem. it works but if there 2 channels. ## redbull and redbull2 this is throw false positives. ugh - file_pattern = f"*{channel_name} - game stats on Twitch in {month} {year} - SullyGnome.csv".lower() + unicode_part = r"[\u3040-\u309F\u30A0-\u30FF_]*" # Matches any hiragana, katakana, and underscore + + file_pattern = f"{unicode_part} \({channel_name}\) - game stats on Twitch in {month} {year} - SullyGnome.csv".lower() + + print("regex :",file_pattern) for filename in os.listdir("/home/yankee/Downloads/"): @@ -60,10 +91,7 @@ def check_download_exists_matching_url(url): else: - print(f"No matching CSV file found for channel '{channel_name}' in year '{year}' and month '{month}'.") + print(f"No matching CSV file found for channel '{channel_name}' in year '{year}' and month '{month}'. (version t55555wo)") return False -def shutDownRobot(): - - print("shutting down the robot now") diff --git a/SullyGnomeRobot.py b/SullyGnomeRobot.py index 8067077..34f355b 100644 --- a/SullyGnomeRobot.py +++ b/SullyGnomeRobot.py @@ -10,15 +10,13 @@ import RestAPIClient import DiskUtils from selenium.webdriver import FirefoxOptions -## https://sullygnome.com/channel/kaicenat/365/games" - def download(url): print(f"launch -> SullyGnomeRobot -> download() -> {url}") opts = FirefoxOptions() - opts.add_argument("--headless") +## opts.add_argument("--headless") driver=webdriver.Firefox(options=opts) @@ -28,7 +26,6 @@ def download(url): print("url / ", driver.current_url) - ########################################## print("stage / start / tblControl_length check") @@ -87,21 +84,19 @@ def download(url): print("file download started. now checking if exist before closing") + counter = 0 + + max_iterations = 5 + while True: - print("checking is file exists... [loop]") + print(f"Loop iteration: {counter}") - existsAlready = DiskUtils.check_download_exists_matching_url(url) + counter += 1 - print("file already exists / ",existsAlready) + if counter == max_iterations: - if existsAlready: - - print(f"The file matching the url '{url}' exists.") - - print("sleeping for 4 secs before shutting down robot") - - time.sleep(4) + print(f"Loop exited after {max_iterations} iterations.") print("shutting down the robot") @@ -109,6 +104,40 @@ def download(url): print("robot shut down. breaking. and dying.") + RestAPIClient.addDownloadFailure(url,"file not found on disk") + + print("stage / complete / database updated w/ FAILURE") + + break + + + + + + print("checking is file exists... [loop]") + + existsAlreadyVersionOne = DiskUtils.check_download_exists_matching_url_version_one(url) + + print("file already exists (v1) / ",existsAlreadyVersionOne) + + if existsAlreadyVersionOne: + + print(f"The file matching the url '{url}' DOES INDEED exists. (version 1)") + +## print("sleeping for 4 secs before shutting down robot") +## +## time.sleep(4) + + print("shutting down the robot") + + driver.quit() + + print("robot shut down. breaking. and dying.") + + RestAPIClient.addDownloadSuccess(url) + + print("stage / complete / database updated w/ success") + break else: @@ -123,8 +152,6 @@ def download(url): ########################################## - RestAPIClient.addDownloadSuccess(url) - print("stage / complete / database updated w/ success") print(f"complete -> SullyGnomeRobot -> download() -> {url}") diff --git a/consumer.py b/consumer.py index f029a53..c9287d3 100644 --- a/consumer.py +++ b/consumer.py @@ -7,6 +7,8 @@ print("## ") print("## starting > JMS consumer (yankee-sully-channels-monthly)") print("## ") +## https://sullygnome.com/channel/yaritaiji/2024october/games + class MyListener(stomp.ConnectionListener): def on_message(headers, message): diff --git a/crontabX b/recycle/crontabX2 similarity index 100% rename from crontabX rename to recycle/crontabX2 diff --git a/ping.bash b/recycle/ping.bash similarity index 100% rename from ping.bash rename to recycle/ping.bash diff --git a/smart-cron-builder.bash b/recycle/smart-cron-builder.bash similarity index 100% rename from smart-cron-builder.bash rename to recycle/smart-cron-builder.bash diff --git a/smart-cron-builder.py b/recycle/smart-cron-builder.py similarity index 100% rename from smart-cron-builder.py rename to recycle/smart-cron-builder.py