From 3bea3bae59e7404b286b5bf97a6270270bfadd6c Mon Sep 17 00:00:00 2001 From: Tom Ritter Date: Sun, 31 Jan 2016 13:22:08 -0600 Subject: Refactor lots of things to allow you to be notified every so often, instead of every single time. --- README.md | 23 ++++++------ jobmanager.py | 55 +++++++++++++++++++++++++--- jobs/EmailChecker.py | 14 ++++---- jobs/HTTPServerChecker.py | 24 ++++++++----- jobs/JobBase.py | 85 +++++++++++++++++++++++++++++++++++++++++--- jobs/JobSpawner.py | 4 ++- jobs/PeerChecker.py | 61 ++++++++++++++++++------------- jobs/TCPServerChecker.py | 22 ++++++++---- jobs/__init__.py | 4 +-- jobstate.py | 61 +++++++++++++++++++++++++++++++ samplejobs/BWAuthChecker.py | 7 +++- samplejobs/MetricsChecker.py | 7 +++- settings.cfg.example | 1 + 13 files changed, 296 insertions(+), 72 deletions(-) create mode 100644 jobstate.py diff --git a/README.md b/README.md index 615af3d..9f3de2c 100644 --- a/README.md +++ b/README.md @@ -29,21 +29,22 @@ This wouldn't be any good if you couldn't specify your own custom jobs. There ar ### Inherit JobBase -JobBase is the base for a job, and should be used when you have a single, custom job you want to run. Your job needs to match the name of the file it is in. You should override two functions: +JobBase is the base for a job, and should be used when you have a single, custom job you want to run. Your job needs to match the name of the file it is in. You should override three (or four) functions: * executeEvery * This should return a JobFrequency constant indicating how often you want the job to run +* notifyOnFailureEvery + * This should return a JobFailureNotificationFrequency constant indicating how often you want to be notified about a (continually) failing job * execute - * This does the work - * It should _not_ return False if it fails, instead it should return False _only if it cannot send email_. If this function returns false, checker assumes it cannot send mail. - -An appropriate way to end the function would be: - - if not success: - return self.sendEmail("Failed executing bob-job", failureMessage) - else: - return True - + * This does the work. It returns True if the job succeeded or False if it didn't +* onFailure + * This is called if the job failed _and_ the admin should be notified. You should return if the email could be sent or not. + * E.G. return self.sendEmail("Job Failed", self.details_of_error) +* onStateChangeSuccess + * This is called if a) you specified JobFailureNotificationFrequency.ONSTATECHANGE in notifyOnFailureEvery + * A job was previously failing but just suceeded + * Like onFailure, it should return if the email could be sent or not. + * E.G. return self.sendEmail("Job Suceeded", "") ### Inherit JobSpawner diff --git a/jobmanager.py b/jobmanager.py index 63ae16b..fa55883 100755 --- a/jobmanager.py +++ b/jobmanager.py @@ -1,30 +1,75 @@ #!/usr/bin/env python +import os import time import logging +import datetime import requests -from jobs import JobFinder +from jobs import JobFinder, JobBase +from jobstate import JobState class JobManager: def __init__(self, config): jobsFinder = JobFinder(config) self.jobs = jobsFinder.get_jobs() self.config = config + self.statefile = config.get('general', 'statefile') + self._load_state() + + def _load_state(self): + self.state = {} + if not os.path.isfile(self.statefile): + logging.warn("Could not find statefile at " + self.statefile + "; creating a new one.") + else: + f = open(self.statefile, "r") + lines = f.readlines() + for l in lines: + s = JobState.Parse(l) + self.state[s.name] = s + f.close() + + def _save_state(self): + f = open(self.statefile, "w") + for i in self.state: + f.write(self.state[i].serialize()) + f.close() def list_jobs(self): return self.jobs def execute_jobs(self, cronmode): logging.info("Executing jobs...") - success = True + emailWorks = True for thisJob in self.jobs: - thisJob.setConfig(self.config) if thisJob.shouldExecute(cronmode): + try: + lastRunStatus = self.state[thisJob.getStateName()] + except: + logging.warn("No state was found for " + thisJob.getStateName() + \ + "\nMaking up a dummy state for it.") + lastRunStatus = self.state[thisJob.getStateName()] = JobState.Empty(thisJob.getStateName()) + logging.info("Executing " + thisJob.getName()) if not thisJob.execute(): - success = False - return success + #Unsuccessful run + logging.info("Execution of " + thisJob.getName() + " failed") + if thisJob.shouldNotifyFailure(lastRunStatus): + lastRunStatus.markFailedAndNotify() + logging.info("Notifying of failure for " + thisJob.getName()) + if not thisJob.onFailure(): + emailWorks = False + else: + lastRunStatus.markFailedNoNotify() + else: + #Successful Run + logging.info("Execution of " + thisJob.getName() + " succeeded") + if lastRunStatus.CurrentStateSuccess == False and thisJob.notifyOnFailureEvery() == JobBase.FailureNotificationFrequency.ONSTATECHANGE: + if not thisJob.onStateChangeSuccess(): + emailWorks = False + lastRunStatus.markSuccessful() + self._save_state() + return emailWorks def mark_jobs_ran(self): logging.debug("Marking jobs as run successfully.") diff --git a/jobs/EmailChecker.py b/jobs/EmailChecker.py index 925e0db..939d5b8 100755 --- a/jobs/EmailChecker.py +++ b/jobs/EmailChecker.py @@ -12,6 +12,8 @@ import JobBase class EmailChecker(JobBase.JobBase): def executeEvery(self): return JobBase.JobFrequency.HOUR + def notifyOnFailureEvery(self): + return JobBase.JobFailureNotificationFrequency.EVERYTIME def execute(self): USER = self.config.get('email', 'user') PASS = self.config.get('email', 'pass') @@ -47,9 +49,9 @@ class EmailChecker(JobBase.JobBase): foundSubject = True M.close() M.logout() - if not foundSubject: - #This may not work, but try anyway - self.sendEmail("Email Fetch Failure", logdetails) - return False - else: - return True + + self.logdetails = logdetails + return foundSubject + def onFailure(self): + return self.sendEmail("Email Fetch Failure", self.logdetails) + diff --git a/jobs/HTTPServerChecker.py b/jobs/HTTPServerChecker.py index ec8eda1..ec2a9d6 100755 --- a/jobs/HTTPServerChecker.py +++ b/jobs/HTTPServerChecker.py @@ -8,29 +8,37 @@ import JobSpawner class HTTPServerChecker(JobSpawner.JobSpawner): servers = [ - #("http://example.com", JobBase.JobFrequency.MINUTE), - #("https://exampletwo.com", JobBase.JobFrequency.MINUTE) + #("http://example.com", JobBase.JobFrequency.MINUTE, JobBase.JobFailureNotificationFrequency.EVERYTIME), + #("https://exampletwo.com", JobBase.JobFrequency.MINUTE, JobBase.JobFailureNotificationFrequency.EVERYTIME) ] class ServerChecker(JobBase.JobBase): - def __init__(self, url, frequency): + def __init__(self, config, url, frequency, failureNotificationFrequency): + self.config = config self.url = url self.frequency = frequency + self.failureNotificationFrequency = failureNotificationFrequency def getName(self): return str(self.__class__) + " for " + self.url def executeEvery(self): return self.frequency + def notifyOnFailureEvery(self): + return self.failureNotificationFrequency def execute(self): try: requests.get(self.url) return True except: - msg = "Could not hit server " + self.url - logging.warn(msg) - return self.sendEmail(msg, "") + self.failuremsg = "Could not hit server " + self.url + logging.warn(self.failuremsg) + return False + def onFailure(self): + return self.sendEmail(self.failuremsg, "") + def onStateChangeSuccess(self): + return self.sendEmail("Successfully hit " + self.url, "") - def get_sub_jobs(self): + def get_sub_jobs(self, config): for s in self.servers: - yield self.ServerChecker(s[0], s[1]) + yield self.ServerChecker(config, s[0], s[1], s[2]) diff --git a/jobs/JobBase.py b/jobs/JobBase.py index a7f02c9..29ca443 100755 --- a/jobs/JobBase.py +++ b/jobs/JobBase.py @@ -1,7 +1,9 @@ #!/usr/bin/env python +import time import random import logging +import datetime import smtplib @@ -11,27 +13,100 @@ class JobFrequency: DAY = "day" DAY_NOON = "day_noon" +class JobFailureNotificationFrequency: + EVERYTIME = "every" + EVERYFIVEMINUTES = "5min" + EVERYTENMINUTES = "10min" + EVERYHOUR = "hour" + ONSTATECHANGE = "state_change" + class JobBase: - def __init__(self): - self.config = None + def __init__(self, config): + self.config = config + + """ Return a friendly name to identify this Job""" def getName(self): return str(self.__class__) + + """Return a non-friendly, guarenteed-unique name to identify this Job + Needed to keep track of the job's run history. + Takes into account the contructor arguments to uniquely identify JobSpawner-jobs""" + def getStateName(self): + return self.getName() + + """Returns True if the job should execute this cron-run""" def shouldExecute(self, cronmode): frequency = self.executeEvery() if cronmode == frequency: return True return False - def setConfig(self, config): - self.config = config - + + """Returns True if the jobmanager should call 'onFailure' to alert the admin""" + def shouldNotifyFailure(self, jobState): + notifyFrequency = self.notifyOnFailureEvery() + if notifyFrequency == JobFailureNotificationFrequency.EVERYTIME: + return True + elif notifyFrequency == JobFailureNotificationFrequency.EVERYFIVEMINUTES: + now = time.time() + lastNotify = jobState.LastNotifyTime + if now - lastNotify > datetime.timedelta(minutes=4, seconds=30): + return True + return False + elif notifyFrequency == JobFailureNotificationFrequency.EVERYTENMINUTES: + now = time.time() + lastNotify = jobState.LastNotifyTime + if now - lastNotify > datetime.timedelta(minutes=9, seconds=15): + return True + return False + elif notifyFrequency == JobFailureNotificationFrequency.EVERYHOUR: + now = time.time() + lastNotify = jobState.LastNotifyTime + if now - lastNotify > datetime.timedelta(minutes=59, seconds=0): + return True + return False + elif notifyFrequency == JobFailureNotificationFrequency.ONSTATECHANGE: + #Only notify if the last JobState was a Success + return jobState.CurrentStateSuccess + return True + + """Helper method to send email""" def sendEmail(self, subject, body, to=""): return sendEmail(self.config, subject, body, to) + + """OVERRIDE ME + Returns a JobFrequency indicating how often the job should be run.""" def executeEvery(self): pass + + """OVERRIDE ME + Returns a JobFailureNotificationFrequency indicating how often a failure + notification email should be sent""" + def notifyOnFailureEvery(self): + pass + + """OVERRIDE ME + Executes the job's actions, and returns true to indicate the job succeeded.""" def execute(self): pass + """OVERRIDE ME + Notify the admin the job failed. Returns True if the email could be + successfully sent. + Example: return self.sendEmail(self.subject, self.body, self.notificationAddress)""" + def onFailure(self): + pass + + """OVERRIDE ME + Notify the admin the job succeeded (when it was previously failing). Only used for + JobFailureNotificationFrequency.ONSTATECHANGE + + Returns True if the email could be successfully sent. + Example: return self.sendEmail(self.subject, self.body, self.notificationAddress)""" + def onStateChangeSuccess(self): + log.warn(self.getName() + " did not override onStateChangeSuccess") + return True + def sendEmail(config, subject, body, to=""): FROM = config.get('email', 'user') PASS = config.get('email', 'pass') diff --git a/jobs/JobSpawner.py b/jobs/JobSpawner.py index 3d09693..50f3043 100755 --- a/jobs/JobSpawner.py +++ b/jobs/JobSpawner.py @@ -1,5 +1,7 @@ #!/usr/bin/env python class JobSpawner: - def get_sub_jobs(self): + """OVERRIDE ME + Returns an array (or using 'yield') of Job objects to run""" + def get_sub_jobs(self, config): pass diff --git a/jobs/PeerChecker.py b/jobs/PeerChecker.py index f17da53..8211472 100755 --- a/jobs/PeerChecker.py +++ b/jobs/PeerChecker.py @@ -9,46 +9,57 @@ import requests import JobBase -class PeerChecker(JobBase.JobBase): - def executeEvery(self): - return JobBase.JobFrequency.HOUR - def execute(self): - testSuccess = True - peers = self.config.items('peers') - for p in peers: - peer = p[1].split(',') +class PeerChecker(JobSpawner.JobSpawner): + class IndividualPeerChecker(JobBase.JobBase): + def __init__(self, config, checkurl, notificationAddress): + self.checkurl = checkurl + self.notificationAddress = notificationAddress + + def executeEvery(self): + return JobBase.JobFrequency.HOUR + def notifyOnFailureEvery(self): + return JobBase.JobFailureNotificationFrequency.EVERYTIME + def execute(self): peerOK = False - subject = "" - body = "" + self.subject = "" + self.body = "" try: - response = requests.get(peer[0]) + response = requests.get(self.checkurl) if response.status_code != 200: peerOK = False - subject = peer[0] + " returned a non-standard status code." - body = str(response.status_code) + "\n" + response.content + self.subject = self.checkurl + " returned a non-standard status code." + self.body = str(response.status_code) + "\n" + response.content else: if "True" in response.content: peerOK = True elif "MailProblem" in response.content: peerOK = False - subject = peer[0] + " reports it cannot send email." - body = str(response.status_code) + "\n" + response.content + self.subject = self.checkurl + " reports it cannot send email." + self.body = str(response.status_code) + "\n" + response.content elif "JobProblem" in response.content: peerOK = False - subject = peer[0] + " reports its jobs are not running." - body = str(response.status_code) + "\n" + response.content + self.subject = self.checkurl + " reports its jobs are not running." + self.body = str(response.status_code) + "\n" + response.content else: peerOK = False - subject = peer[0] + " had an unexpected response." - body = str(response.status_code) + "\n" + response.content + self.subject = self.checkurl + " had an unexpected response." + self.body = str(response.status_code) + "\n" + response.content except Exception as e: peerOK = False - subject = peer[0] + " is not responding." - body = str(e) + self.subject = self.checkurl + " is not responding." + self.body = str(e) + return peerOK - if not peerOK: - if not self.sendEmail(subject, body, peer[1]): - testSuccess = False - return testSuccess + return peerOK: + def onFailure(self): + return self.sendEmail(self.subject, self.body, self.notificationAddress) + def onStateChangeSuccess(self): + return self.sendEmail("Successfully hit " + self.checkurl, "", self.notificationAddress) + + def get_sub_jobs(self, config): + peers = config.items('peers') + for p in peers: + (address, email) = p[1].split(',') + yield self.IndividualPeerChecker(config, address, email) diff --git a/jobs/TCPServerChecker.py b/jobs/TCPServerChecker.py index 711047b..642e188 100755 --- a/jobs/TCPServerChecker.py +++ b/jobs/TCPServerChecker.py @@ -9,20 +9,24 @@ import JobSpawner class TCPServerChecker(JobSpawner.JobSpawner): servers = [ - #("example.com", 53, "example.com:tcpdns", JobBase.JobFrequency.MINUTE), + #("example.com", 53, "example.com:tcpdns", JobBase.JobFrequency.MINUTE, JobBase.JobFailureNotificationFrequency.EVERYTIME) ] class ServerChecker(JobBase.JobBase): - def __init__(self, ip, port, friendlyName, frequency): + def __init__(self, config, ip, port, friendlyName, frequency, failureNotificationFrequency): + self.config = config self.ip = ip self.port = port self.friendlyName = friendlyName + "(" + self.ip + ":" + str(self.port) + ")" self.frequency = frequency + self.failureNotificationFrequency = failureNotificationFrequency def getName(self): return str(self.__class__) + " for " + self.friendlyName def executeEvery(self): return self.frequency + def notifyOnFailureEvery(self): + return self.failureNotificationFrequency def execute(self): try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -30,12 +34,16 @@ class TCPServerChecker(JobSpawner.JobSpawner): s.close() return True except: - msg = "Could not hit server " + self.friendlyName - logging.warn(msg) - return self.sendEmail(msg, "") + self.failuremsg = "Could not hit server " + self.friendlyName + logging.warn(self.failuremsg) + return False + def onFailure(self): + return self.sendEmail(self.failuremsg, "") + def onStateChangeSuccess(self): + return self.sendEmail("Successfully hit " + self.friendlyName, "") - def get_sub_jobs(self): + def get_sub_jobs(self, config): for s in self.servers: - yield self.ServerChecker(s[0], s[1], s[2], s[3]) + yield self.ServerChecker(config, s[0], s[1], s[2], s[3], s[4]) diff --git a/jobs/__init__.py b/jobs/__init__.py index 9955164..0e780eb 100755 --- a/jobs/__init__.py +++ b/jobs/__init__.py @@ -44,10 +44,10 @@ class JobFinder: # It has to do with JobBase being imported multiple times (within jobs) or something if base.__name__ == 'JobBase': # A job was found, keep it - self._jobs.add(obj()) + self._jobs.add(obj(self.config)) elif base.__name__ == 'JobSpawner': spawner = obj() - for j in spawner.get_sub_jobs(): + for j in spawner.get_sub_jobs(self.config): self._jobs.add(j) diff --git a/jobstate.py b/jobstate.py new file mode 100644 index 0000000..4df0de0 --- /dev/null +++ b/jobstate.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +import time +import logging +import datetime + +class JobState: + def __init__(self, name): + self.name = name + self.CurrentStateSuccess = True + + def markFailedAndNotify(self): + if self.CurrentStateSuccess: + self.CurrentStateSuccess = False + self.FirstFailureTime = time.time() + self.LastNotifyTime = self.FirstFailureTime + else: + self.LastNotifyTime = time.time() + + def markFailedNoNotify(self): + if self.CurrentStateSuccess: + logging.warn("Somehow we called markFailedNoNotify, on a success condition, without notifying the user") + self.CurrentStateSuccess = False + self.FirstFailureTime = time.time() + self.LastNotifyTime = 0 + else: + pass + + def markSuccessful(self): + if self.CurrentStateSuccess: + pass + else: + self.CurrentStateSuccess = True + self.FirstFailureTime = 0 + self.LastNotifyTime = 0 + + def serialize(self): + ret = self.name + "|" + ret += "Succeeding" if self.CurrentStateSuccess else "Failing" + ret += "|" + str(self.FirstFailureTime) + ret += "|" + str(self.LastNotifyTime) + return ret + + @staticmethod + def Parse(line): + s = JobState() + + line = line.strip() + parts = line.split("|") + + s.name = parts[0] + s.CurrentStateSuccess = True if parts[1] == "Succeeding" else False + s.FirstFailureTime = float(parts[2]) + s.LastNotifyTime = float(parts[3]) + + return s + + @staticmethod + def Empty(name): + s = JobState(name) + return s \ No newline at end of file diff --git a/samplejobs/BWAuthChecker.py b/samplejobs/BWAuthChecker.py index 9116857..2b88e95 100755 --- a/samplejobs/BWAuthChecker.py +++ b/samplejobs/BWAuthChecker.py @@ -13,6 +13,8 @@ import JobBase class BWAuthChecker(JobBase.JobBase): def executeEvery(self): return JobBase.JobFrequency.HOUR + def notifyOnFailureEvery(self): + return JobBase.JobFailureNotificationFrequency.EVERYTIME def execute(self): body = "" url = "https://example.com/bwauth/bwscan.V3BandwidthsFile" @@ -34,6 +36,9 @@ class BWAuthChecker(JobBase.JobBase): if body: logging.warn("tor bwauth is broken?") logging.warn(body) - return self.sendEmail("tor bwauth is broken?", body) + self.logdetails = body + return False else: return True + def onFailure(self): + return self.sendEmail("tor bwauth is broken?", self.logdetails) diff --git a/samplejobs/MetricsChecker.py b/samplejobs/MetricsChecker.py index d46c7d3..1d29a81 100755 --- a/samplejobs/MetricsChecker.py +++ b/samplejobs/MetricsChecker.py @@ -12,6 +12,8 @@ import JobBase class MetricsChecker(JobBase.JobBase): def executeEvery(self): return JobBase.JobFrequency.DAY_NOON + def notifyOnFailureEvery(self): + return JobBase.JobFailureNotificationFrequency.EVERYTIME def execute(self): body = "" ys = datetime.date.today() - datetime.timedelta(hours=24) @@ -27,6 +29,9 @@ class MetricsChecker(JobBase.JobBase): if body: logging.warn("tor metrics is broken?") logging.warn(body) - return self.sendEmail("tor metrics is broken?", body) + self.logdetails = body + return False else: return True + def onFailure(self): + return self.sendEmail("tor metrics is broken?", self.logdetails) diff --git a/settings.cfg.example b/settings.cfg.example index 3ce86f2..ffac31e 100755 --- a/settings.cfg.example +++ b/settings.cfg.example @@ -1,6 +1,7 @@ [general] servername=uniqueservername alertcontact=youremail@example.com +statefile=jobrunhistory.db [email] user=agmailaccountyoucreate@gmail.com -- cgit v1.2.3