diff options
-rw-r--r-- | README.md | 23 | ||||
-rwxr-xr-x | jobmanager.py | 55 | ||||
-rwxr-xr-x | jobs/EmailChecker.py | 14 | ||||
-rwxr-xr-x | jobs/HTTPServerChecker.py | 24 | ||||
-rwxr-xr-x | jobs/JobBase.py | 85 | ||||
-rwxr-xr-x | jobs/JobSpawner.py | 4 | ||||
-rwxr-xr-x | jobs/PeerChecker.py | 61 | ||||
-rwxr-xr-x | jobs/TCPServerChecker.py | 22 | ||||
-rwxr-xr-x | jobs/__init__.py | 4 | ||||
-rw-r--r-- | jobstate.py | 61 | ||||
-rwxr-xr-x | samplejobs/BWAuthChecker.py | 7 | ||||
-rwxr-xr-x | samplejobs/MetricsChecker.py | 7 | ||||
-rwxr-xr-x | settings.cfg.example | 1 |
13 files changed, 296 insertions, 72 deletions
@@ -29,21 +29,22 @@ This wouldn't be any good if you couldn't specify your own custom jobs. There ar ### Inherit JobBase -JobBase is the base for a job, and should be used when you have a single, custom job you want to run. Your job needs to match the name of the file it is in. You should override two functions: +JobBase is the base for a job, and should be used when you have a single, custom job you want to run. Your job needs to match the name of the file it is in. You should override three (or four) functions: * executeEvery * This should return a JobFrequency constant indicating how often you want the job to run +* notifyOnFailureEvery + * This should return a JobFailureNotificationFrequency constant indicating how often you want to be notified about a (continually) failing job * execute - * This does the work - * It should _not_ return False if it fails, instead it should return False _only if it cannot send email_. If this function returns false, checker assumes it cannot send mail. - -An appropriate way to end the function would be: - - if not success: - return self.sendEmail("Failed executing bob-job", failureMessage) - else: - return True - + * This does the work. It returns True if the job succeeded or False if it didn't +* onFailure + * This is called if the job failed _and_ the admin should be notified. You should return if the email could be sent or not. + * E.G. return self.sendEmail("Job Failed", self.details_of_error) +* onStateChangeSuccess + * This is called if a) you specified JobFailureNotificationFrequency.ONSTATECHANGE in notifyOnFailureEvery + * A job was previously failing but just suceeded + * Like onFailure, it should return if the email could be sent or not. + * E.G. return self.sendEmail("Job Suceeded", "") ### Inherit JobSpawner diff --git a/jobmanager.py b/jobmanager.py index 63ae16b..fa55883 100755 --- a/jobmanager.py +++ b/jobmanager.py @@ -1,30 +1,75 @@ #!/usr/bin/env python +import os import time import logging +import datetime import requests -from jobs import JobFinder +from jobs import JobFinder, JobBase +from jobstate import JobState class JobManager: def __init__(self, config): jobsFinder = JobFinder(config) self.jobs = jobsFinder.get_jobs() self.config = config + self.statefile = config.get('general', 'statefile') + self._load_state() + + def _load_state(self): + self.state = {} + if not os.path.isfile(self.statefile): + logging.warn("Could not find statefile at " + self.statefile + "; creating a new one.") + else: + f = open(self.statefile, "r") + lines = f.readlines() + for l in lines: + s = JobState.Parse(l) + self.state[s.name] = s + f.close() + + def _save_state(self): + f = open(self.statefile, "w") + for i in self.state: + f.write(self.state[i].serialize()) + f.close() def list_jobs(self): return self.jobs def execute_jobs(self, cronmode): logging.info("Executing jobs...") - success = True + emailWorks = True for thisJob in self.jobs: - thisJob.setConfig(self.config) if thisJob.shouldExecute(cronmode): + try: + lastRunStatus = self.state[thisJob.getStateName()] + except: + logging.warn("No state was found for " + thisJob.getStateName() + \ + "\nMaking up a dummy state for it.") + lastRunStatus = self.state[thisJob.getStateName()] = JobState.Empty(thisJob.getStateName()) + logging.info("Executing " + thisJob.getName()) if not thisJob.execute(): - success = False - return success + #Unsuccessful run + logging.info("Execution of " + thisJob.getName() + " failed") + if thisJob.shouldNotifyFailure(lastRunStatus): + lastRunStatus.markFailedAndNotify() + logging.info("Notifying of failure for " + thisJob.getName()) + if not thisJob.onFailure(): + emailWorks = False + else: + lastRunStatus.markFailedNoNotify() + else: + #Successful Run + logging.info("Execution of " + thisJob.getName() + " succeeded") + if lastRunStatus.CurrentStateSuccess == False and thisJob.notifyOnFailureEvery() == JobBase.FailureNotificationFrequency.ONSTATECHANGE: + if not thisJob.onStateChangeSuccess(): + emailWorks = False + lastRunStatus.markSuccessful() + self._save_state() + return emailWorks def mark_jobs_ran(self): logging.debug("Marking jobs as run successfully.") diff --git a/jobs/EmailChecker.py b/jobs/EmailChecker.py index 925e0db..939d5b8 100755 --- a/jobs/EmailChecker.py +++ b/jobs/EmailChecker.py @@ -12,6 +12,8 @@ import JobBase class EmailChecker(JobBase.JobBase): def executeEvery(self): return JobBase.JobFrequency.HOUR + def notifyOnFailureEvery(self): + return JobBase.JobFailureNotificationFrequency.EVERYTIME def execute(self): USER = self.config.get('email', 'user') PASS = self.config.get('email', 'pass') @@ -47,9 +49,9 @@ class EmailChecker(JobBase.JobBase): foundSubject = True M.close() M.logout() - if not foundSubject: - #This may not work, but try anyway - self.sendEmail("Email Fetch Failure", logdetails) - return False - else: - return True + + self.logdetails = logdetails + return foundSubject + def onFailure(self): + return self.sendEmail("Email Fetch Failure", self.logdetails) + diff --git a/jobs/HTTPServerChecker.py b/jobs/HTTPServerChecker.py index ec8eda1..ec2a9d6 100755 --- a/jobs/HTTPServerChecker.py +++ b/jobs/HTTPServerChecker.py @@ -8,29 +8,37 @@ import JobSpawner class HTTPServerChecker(JobSpawner.JobSpawner):
servers = [
- #("http://example.com", JobBase.JobFrequency.MINUTE),
- #("https://exampletwo.com", JobBase.JobFrequency.MINUTE)
+ #("http://example.com", JobBase.JobFrequency.MINUTE, JobBase.JobFailureNotificationFrequency.EVERYTIME),
+ #("https://exampletwo.com", JobBase.JobFrequency.MINUTE, JobBase.JobFailureNotificationFrequency.EVERYTIME)
]
class ServerChecker(JobBase.JobBase):
- def __init__(self, url, frequency):
+ def __init__(self, config, url, frequency, failureNotificationFrequency):
+ self.config = config
self.url = url
self.frequency = frequency
+ self.failureNotificationFrequency = failureNotificationFrequency
def getName(self):
return str(self.__class__) + " for " + self.url
def executeEvery(self):
return self.frequency
+ def notifyOnFailureEvery(self):
+ return self.failureNotificationFrequency
def execute(self):
try:
requests.get(self.url)
return True
except:
- msg = "Could not hit server " + self.url
- logging.warn(msg)
- return self.sendEmail(msg, "")
+ self.failuremsg = "Could not hit server " + self.url
+ logging.warn(self.failuremsg)
+ return False
+ def onFailure(self):
+ return self.sendEmail(self.failuremsg, "")
+ def onStateChangeSuccess(self):
+ return self.sendEmail("Successfully hit " + self.url, "")
- def get_sub_jobs(self):
+ def get_sub_jobs(self, config):
for s in self.servers:
- yield self.ServerChecker(s[0], s[1])
+ yield self.ServerChecker(config, s[0], s[1], s[2])
diff --git a/jobs/JobBase.py b/jobs/JobBase.py index a7f02c9..29ca443 100755 --- a/jobs/JobBase.py +++ b/jobs/JobBase.py @@ -1,7 +1,9 @@ #!/usr/bin/env python +import time import random import logging +import datetime import smtplib @@ -11,27 +13,100 @@ class JobFrequency: DAY = "day" DAY_NOON = "day_noon" +class JobFailureNotificationFrequency: + EVERYTIME = "every" + EVERYFIVEMINUTES = "5min" + EVERYTENMINUTES = "10min" + EVERYHOUR = "hour" + ONSTATECHANGE = "state_change" + class JobBase: - def __init__(self): - self.config = None + def __init__(self, config): + self.config = config + + """ Return a friendly name to identify this Job""" def getName(self): return str(self.__class__) + + """Return a non-friendly, guarenteed-unique name to identify this Job + Needed to keep track of the job's run history. + Takes into account the contructor arguments to uniquely identify JobSpawner-jobs""" + def getStateName(self): + return self.getName() + + """Returns True if the job should execute this cron-run""" def shouldExecute(self, cronmode): frequency = self.executeEvery() if cronmode == frequency: return True return False - def setConfig(self, config): - self.config = config - + + """Returns True if the jobmanager should call 'onFailure' to alert the admin""" + def shouldNotifyFailure(self, jobState): + notifyFrequency = self.notifyOnFailureEvery() + if notifyFrequency == JobFailureNotificationFrequency.EVERYTIME: + return True + elif notifyFrequency == JobFailureNotificationFrequency.EVERYFIVEMINUTES: + now = time.time() + lastNotify = jobState.LastNotifyTime + if now - lastNotify > datetime.timedelta(minutes=4, seconds=30): + return True + return False + elif notifyFrequency == JobFailureNotificationFrequency.EVERYTENMINUTES: + now = time.time() + lastNotify = jobState.LastNotifyTime + if now - lastNotify > datetime.timedelta(minutes=9, seconds=15): + return True + return False + elif notifyFrequency == JobFailureNotificationFrequency.EVERYHOUR: + now = time.time() + lastNotify = jobState.LastNotifyTime + if now - lastNotify > datetime.timedelta(minutes=59, seconds=0): + return True + return False + elif notifyFrequency == JobFailureNotificationFrequency.ONSTATECHANGE: + #Only notify if the last JobState was a Success + return jobState.CurrentStateSuccess + return True + + """Helper method to send email""" def sendEmail(self, subject, body, to=""): return sendEmail(self.config, subject, body, to) + + """OVERRIDE ME + Returns a JobFrequency indicating how often the job should be run.""" def executeEvery(self): pass + + """OVERRIDE ME + Returns a JobFailureNotificationFrequency indicating how often a failure + notification email should be sent""" + def notifyOnFailureEvery(self): + pass + + """OVERRIDE ME + Executes the job's actions, and returns true to indicate the job succeeded.""" def execute(self): pass + """OVERRIDE ME + Notify the admin the job failed. Returns True if the email could be + successfully sent. + Example: return self.sendEmail(self.subject, self.body, self.notificationAddress)""" + def onFailure(self): + pass + + """OVERRIDE ME + Notify the admin the job succeeded (when it was previously failing). Only used for + JobFailureNotificationFrequency.ONSTATECHANGE + + Returns True if the email could be successfully sent. + Example: return self.sendEmail(self.subject, self.body, self.notificationAddress)""" + def onStateChangeSuccess(self): + log.warn(self.getName() + " did not override onStateChangeSuccess") + return True + def sendEmail(config, subject, body, to=""): FROM = config.get('email', 'user') PASS = config.get('email', 'pass') diff --git a/jobs/JobSpawner.py b/jobs/JobSpawner.py index 3d09693..50f3043 100755 --- a/jobs/JobSpawner.py +++ b/jobs/JobSpawner.py @@ -1,5 +1,7 @@ #!/usr/bin/env python
class JobSpawner:
- def get_sub_jobs(self):
+ """OVERRIDE ME
+ Returns an array (or using 'yield') of Job objects to run"""
+ def get_sub_jobs(self, config):
pass
diff --git a/jobs/PeerChecker.py b/jobs/PeerChecker.py index f17da53..8211472 100755 --- a/jobs/PeerChecker.py +++ b/jobs/PeerChecker.py @@ -9,46 +9,57 @@ import requests import JobBase
-class PeerChecker(JobBase.JobBase):
- def executeEvery(self):
- return JobBase.JobFrequency.HOUR
- def execute(self):
- testSuccess = True
- peers = self.config.items('peers')
- for p in peers:
- peer = p[1].split(',')
+class PeerChecker(JobSpawner.JobSpawner):
+ class IndividualPeerChecker(JobBase.JobBase):
+ def __init__(self, config, checkurl, notificationAddress):
+ self.checkurl = checkurl
+ self.notificationAddress = notificationAddress
+
+ def executeEvery(self):
+ return JobBase.JobFrequency.HOUR
+ def notifyOnFailureEvery(self):
+ return JobBase.JobFailureNotificationFrequency.EVERYTIME
+ def execute(self):
peerOK = False
- subject = ""
- body = ""
+ self.subject = ""
+ self.body = ""
try:
- response = requests.get(peer[0])
+ response = requests.get(self.checkurl)
if response.status_code != 200:
peerOK = False
- subject = peer[0] + " returned a non-standard status code."
- body = str(response.status_code) + "\n" + response.content
+ self.subject = self.checkurl + " returned a non-standard status code."
+ self.body = str(response.status_code) + "\n" + response.content
else:
if "True" in response.content:
peerOK = True
elif "MailProblem" in response.content:
peerOK = False
- subject = peer[0] + " reports it cannot send email."
- body = str(response.status_code) + "\n" + response.content
+ self.subject = self.checkurl + " reports it cannot send email."
+ self.body = str(response.status_code) + "\n" + response.content
elif "JobProblem" in response.content:
peerOK = False
- subject = peer[0] + " reports its jobs are not running."
- body = str(response.status_code) + "\n" + response.content
+ self.subject = self.checkurl + " reports its jobs are not running."
+ self.body = str(response.status_code) + "\n" + response.content
else:
peerOK = False
- subject = peer[0] + " had an unexpected response."
- body = str(response.status_code) + "\n" + response.content
+ self.subject = self.checkurl + " had an unexpected response."
+ self.body = str(response.status_code) + "\n" + response.content
except Exception as e:
peerOK = False
- subject = peer[0] + " is not responding."
- body = str(e)
+ self.subject = self.checkurl + " is not responding."
+ self.body = str(e)
+ return peerOK
- if not peerOK:
- if not self.sendEmail(subject, body, peer[1]):
- testSuccess = False
- return testSuccess
+ return peerOK:
+ def onFailure(self):
+ return self.sendEmail(self.subject, self.body, self.notificationAddress)
+ def onStateChangeSuccess(self):
+ return self.sendEmail("Successfully hit " + self.checkurl, "", self.notificationAddress)
+
+ def get_sub_jobs(self, config):
+ peers = config.items('peers')
+ for p in peers:
+ (address, email) = p[1].split(',')
+ yield self.IndividualPeerChecker(config, address, email)
diff --git a/jobs/TCPServerChecker.py b/jobs/TCPServerChecker.py index 711047b..642e188 100755 --- a/jobs/TCPServerChecker.py +++ b/jobs/TCPServerChecker.py @@ -9,20 +9,24 @@ import JobSpawner class TCPServerChecker(JobSpawner.JobSpawner):
servers = [
- #("example.com", 53, "example.com:tcpdns", JobBase.JobFrequency.MINUTE),
+ #("example.com", 53, "example.com:tcpdns", JobBase.JobFrequency.MINUTE, JobBase.JobFailureNotificationFrequency.EVERYTIME)
]
class ServerChecker(JobBase.JobBase):
- def __init__(self, ip, port, friendlyName, frequency):
+ def __init__(self, config, ip, port, friendlyName, frequency, failureNotificationFrequency):
+ self.config = config
self.ip = ip
self.port = port
self.friendlyName = friendlyName + "(" + self.ip + ":" + str(self.port) + ")"
self.frequency = frequency
+ self.failureNotificationFrequency = failureNotificationFrequency
def getName(self):
return str(self.__class__) + " for " + self.friendlyName
def executeEvery(self):
return self.frequency
+ def notifyOnFailureEvery(self):
+ return self.failureNotificationFrequency
def execute(self):
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -30,12 +34,16 @@ class TCPServerChecker(JobSpawner.JobSpawner): s.close()
return True
except:
- msg = "Could not hit server " + self.friendlyName
- logging.warn(msg)
- return self.sendEmail(msg, "")
+ self.failuremsg = "Could not hit server " + self.friendlyName
+ logging.warn(self.failuremsg)
+ return False
+ def onFailure(self):
+ return self.sendEmail(self.failuremsg, "")
+ def onStateChangeSuccess(self):
+ return self.sendEmail("Successfully hit " + self.friendlyName, "")
- def get_sub_jobs(self):
+ def get_sub_jobs(self, config):
for s in self.servers:
- yield self.ServerChecker(s[0], s[1], s[2], s[3])
+ yield self.ServerChecker(config, s[0], s[1], s[2], s[3], s[4])
diff --git a/jobs/__init__.py b/jobs/__init__.py index 9955164..0e780eb 100755 --- a/jobs/__init__.py +++ b/jobs/__init__.py @@ -44,10 +44,10 @@ class JobFinder: # It has to do with JobBase being imported multiple times (within jobs) or something if base.__name__ == 'JobBase': # A job was found, keep it - self._jobs.add(obj()) + self._jobs.add(obj(self.config)) elif base.__name__ == 'JobSpawner': spawner = obj() - for j in spawner.get_sub_jobs(): + for j in spawner.get_sub_jobs(self.config): self._jobs.add(j) diff --git a/jobstate.py b/jobstate.py new file mode 100644 index 0000000..4df0de0 --- /dev/null +++ b/jobstate.py @@ -0,0 +1,61 @@ +#!/usr/bin/env python + +import time +import logging +import datetime + +class JobState: + def __init__(self, name): + self.name = name + self.CurrentStateSuccess = True + + def markFailedAndNotify(self): + if self.CurrentStateSuccess: + self.CurrentStateSuccess = False + self.FirstFailureTime = time.time() + self.LastNotifyTime = self.FirstFailureTime + else: + self.LastNotifyTime = time.time() + + def markFailedNoNotify(self): + if self.CurrentStateSuccess: + logging.warn("Somehow we called markFailedNoNotify, on a success condition, without notifying the user") + self.CurrentStateSuccess = False + self.FirstFailureTime = time.time() + self.LastNotifyTime = 0 + else: + pass + + def markSuccessful(self): + if self.CurrentStateSuccess: + pass + else: + self.CurrentStateSuccess = True + self.FirstFailureTime = 0 + self.LastNotifyTime = 0 + + def serialize(self): + ret = self.name + "|" + ret += "Succeeding" if self.CurrentStateSuccess else "Failing" + ret += "|" + str(self.FirstFailureTime) + ret += "|" + str(self.LastNotifyTime) + return ret + + @staticmethod + def Parse(line): + s = JobState() + + line = line.strip() + parts = line.split("|") + + s.name = parts[0] + s.CurrentStateSuccess = True if parts[1] == "Succeeding" else False + s.FirstFailureTime = float(parts[2]) + s.LastNotifyTime = float(parts[3]) + + return s + + @staticmethod + def Empty(name): + s = JobState(name) + return s
\ No newline at end of file diff --git a/samplejobs/BWAuthChecker.py b/samplejobs/BWAuthChecker.py index 9116857..2b88e95 100755 --- a/samplejobs/BWAuthChecker.py +++ b/samplejobs/BWAuthChecker.py @@ -13,6 +13,8 @@ import JobBase class BWAuthChecker(JobBase.JobBase): def executeEvery(self): return JobBase.JobFrequency.HOUR + def notifyOnFailureEvery(self): + return JobBase.JobFailureNotificationFrequency.EVERYTIME def execute(self): body = "" url = "https://example.com/bwauth/bwscan.V3BandwidthsFile" @@ -34,6 +36,9 @@ class BWAuthChecker(JobBase.JobBase): if body: logging.warn("tor bwauth is broken?") logging.warn(body) - return self.sendEmail("tor bwauth is broken?", body) + self.logdetails = body + return False else: return True + def onFailure(self): + return self.sendEmail("tor bwauth is broken?", self.logdetails) diff --git a/samplejobs/MetricsChecker.py b/samplejobs/MetricsChecker.py index d46c7d3..1d29a81 100755 --- a/samplejobs/MetricsChecker.py +++ b/samplejobs/MetricsChecker.py @@ -12,6 +12,8 @@ import JobBase class MetricsChecker(JobBase.JobBase): def executeEvery(self): return JobBase.JobFrequency.DAY_NOON + def notifyOnFailureEvery(self): + return JobBase.JobFailureNotificationFrequency.EVERYTIME def execute(self): body = "" ys = datetime.date.today() - datetime.timedelta(hours=24) @@ -27,6 +29,9 @@ class MetricsChecker(JobBase.JobBase): if body: logging.warn("tor metrics is broken?") logging.warn(body) - return self.sendEmail("tor metrics is broken?", body) + self.logdetails = body + return False else: return True + def onFailure(self): + return self.sendEmail("tor metrics is broken?", self.logdetails) diff --git a/settings.cfg.example b/settings.cfg.example index 3ce86f2..ffac31e 100755 --- a/settings.cfg.example +++ b/settings.cfg.example @@ -1,6 +1,7 @@ [general]
servername=uniqueservername
alertcontact=youremail@example.com
+statefile=jobrunhistory.db
[email]
user=agmailaccountyoucreate@gmail.com
|