aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Ritter <tom@ritter.vg>2016-01-31 13:22:08 -0600
committerTom Ritter <tom@ritter.vg>2016-01-31 13:22:08 -0600
commit3bea3bae59e7404b286b5bf97a6270270bfadd6c (patch)
treec9d1d7cf76681420587198d09abda761912c47c4
parent1a8b46d940d3a4bc06700d15307191bb10008ea6 (diff)
Refactor lots of things to allow you to be notified every so often, instead of every single time.
-rw-r--r--README.md23
-rwxr-xr-xjobmanager.py55
-rwxr-xr-xjobs/EmailChecker.py14
-rwxr-xr-xjobs/HTTPServerChecker.py24
-rwxr-xr-xjobs/JobBase.py85
-rwxr-xr-xjobs/JobSpawner.py4
-rwxr-xr-xjobs/PeerChecker.py61
-rwxr-xr-xjobs/TCPServerChecker.py22
-rwxr-xr-xjobs/__init__.py4
-rw-r--r--jobstate.py61
-rwxr-xr-xsamplejobs/BWAuthChecker.py7
-rwxr-xr-xsamplejobs/MetricsChecker.py7
-rwxr-xr-xsettings.cfg.example1
13 files changed, 296 insertions, 72 deletions
diff --git a/README.md b/README.md
index 615af3d..9f3de2c 100644
--- a/README.md
+++ b/README.md
@@ -29,21 +29,22 @@ This wouldn't be any good if you couldn't specify your own custom jobs. There ar
### Inherit JobBase
-JobBase is the base for a job, and should be used when you have a single, custom job you want to run. Your job needs to match the name of the file it is in. You should override two functions:
+JobBase is the base for a job, and should be used when you have a single, custom job you want to run. Your job needs to match the name of the file it is in. You should override three (or four) functions:
* executeEvery
* This should return a JobFrequency constant indicating how often you want the job to run
+* notifyOnFailureEvery
+ * This should return a JobFailureNotificationFrequency constant indicating how often you want to be notified about a (continually) failing job
* execute
- * This does the work
- * It should _not_ return False if it fails, instead it should return False _only if it cannot send email_. If this function returns false, checker assumes it cannot send mail.
-
-An appropriate way to end the function would be:
-
- if not success:
- return self.sendEmail("Failed executing bob-job", failureMessage)
- else:
- return True
-
+ * This does the work. It returns True if the job succeeded or False if it didn't
+* onFailure
+ * This is called if the job failed _and_ the admin should be notified. You should return if the email could be sent or not.
+ * E.G. return self.sendEmail("Job Failed", self.details_of_error)
+* onStateChangeSuccess
+ * This is called if a) you specified JobFailureNotificationFrequency.ONSTATECHANGE in notifyOnFailureEvery
+ * A job was previously failing but just suceeded
+ * Like onFailure, it should return if the email could be sent or not.
+ * E.G. return self.sendEmail("Job Suceeded", "")
### Inherit JobSpawner
diff --git a/jobmanager.py b/jobmanager.py
index 63ae16b..fa55883 100755
--- a/jobmanager.py
+++ b/jobmanager.py
@@ -1,30 +1,75 @@
#!/usr/bin/env python
+import os
import time
import logging
+import datetime
import requests
-from jobs import JobFinder
+from jobs import JobFinder, JobBase
+from jobstate import JobState
class JobManager:
def __init__(self, config):
jobsFinder = JobFinder(config)
self.jobs = jobsFinder.get_jobs()
self.config = config
+ self.statefile = config.get('general', 'statefile')
+ self._load_state()
+
+ def _load_state(self):
+ self.state = {}
+ if not os.path.isfile(self.statefile):
+ logging.warn("Could not find statefile at " + self.statefile + "; creating a new one.")
+ else:
+ f = open(self.statefile, "r")
+ lines = f.readlines()
+ for l in lines:
+ s = JobState.Parse(l)
+ self.state[s.name] = s
+ f.close()
+
+ def _save_state(self):
+ f = open(self.statefile, "w")
+ for i in self.state:
+ f.write(self.state[i].serialize())
+ f.close()
def list_jobs(self):
return self.jobs
def execute_jobs(self, cronmode):
logging.info("Executing jobs...")
- success = True
+ emailWorks = True
for thisJob in self.jobs:
- thisJob.setConfig(self.config)
if thisJob.shouldExecute(cronmode):
+ try:
+ lastRunStatus = self.state[thisJob.getStateName()]
+ except:
+ logging.warn("No state was found for " + thisJob.getStateName() + \
+ "\nMaking up a dummy state for it.")
+ lastRunStatus = self.state[thisJob.getStateName()] = JobState.Empty(thisJob.getStateName())
+
logging.info("Executing " + thisJob.getName())
if not thisJob.execute():
- success = False
- return success
+ #Unsuccessful run
+ logging.info("Execution of " + thisJob.getName() + " failed")
+ if thisJob.shouldNotifyFailure(lastRunStatus):
+ lastRunStatus.markFailedAndNotify()
+ logging.info("Notifying of failure for " + thisJob.getName())
+ if not thisJob.onFailure():
+ emailWorks = False
+ else:
+ lastRunStatus.markFailedNoNotify()
+ else:
+ #Successful Run
+ logging.info("Execution of " + thisJob.getName() + " succeeded")
+ if lastRunStatus.CurrentStateSuccess == False and thisJob.notifyOnFailureEvery() == JobBase.FailureNotificationFrequency.ONSTATECHANGE:
+ if not thisJob.onStateChangeSuccess():
+ emailWorks = False
+ lastRunStatus.markSuccessful()
+ self._save_state()
+ return emailWorks
def mark_jobs_ran(self):
logging.debug("Marking jobs as run successfully.")
diff --git a/jobs/EmailChecker.py b/jobs/EmailChecker.py
index 925e0db..939d5b8 100755
--- a/jobs/EmailChecker.py
+++ b/jobs/EmailChecker.py
@@ -12,6 +12,8 @@ import JobBase
class EmailChecker(JobBase.JobBase):
def executeEvery(self):
return JobBase.JobFrequency.HOUR
+ def notifyOnFailureEvery(self):
+ return JobBase.JobFailureNotificationFrequency.EVERYTIME
def execute(self):
USER = self.config.get('email', 'user')
PASS = self.config.get('email', 'pass')
@@ -47,9 +49,9 @@ class EmailChecker(JobBase.JobBase):
foundSubject = True
M.close()
M.logout()
- if not foundSubject:
- #This may not work, but try anyway
- self.sendEmail("Email Fetch Failure", logdetails)
- return False
- else:
- return True
+
+ self.logdetails = logdetails
+ return foundSubject
+ def onFailure(self):
+ return self.sendEmail("Email Fetch Failure", self.logdetails)
+
diff --git a/jobs/HTTPServerChecker.py b/jobs/HTTPServerChecker.py
index ec8eda1..ec2a9d6 100755
--- a/jobs/HTTPServerChecker.py
+++ b/jobs/HTTPServerChecker.py
@@ -8,29 +8,37 @@ import JobSpawner
class HTTPServerChecker(JobSpawner.JobSpawner):
servers = [
- #("http://example.com", JobBase.JobFrequency.MINUTE),
- #("https://exampletwo.com", JobBase.JobFrequency.MINUTE)
+ #("http://example.com", JobBase.JobFrequency.MINUTE, JobBase.JobFailureNotificationFrequency.EVERYTIME),
+ #("https://exampletwo.com", JobBase.JobFrequency.MINUTE, JobBase.JobFailureNotificationFrequency.EVERYTIME)
]
class ServerChecker(JobBase.JobBase):
- def __init__(self, url, frequency):
+ def __init__(self, config, url, frequency, failureNotificationFrequency):
+ self.config = config
self.url = url
self.frequency = frequency
+ self.failureNotificationFrequency = failureNotificationFrequency
def getName(self):
return str(self.__class__) + " for " + self.url
def executeEvery(self):
return self.frequency
+ def notifyOnFailureEvery(self):
+ return self.failureNotificationFrequency
def execute(self):
try:
requests.get(self.url)
return True
except:
- msg = "Could not hit server " + self.url
- logging.warn(msg)
- return self.sendEmail(msg, "")
+ self.failuremsg = "Could not hit server " + self.url
+ logging.warn(self.failuremsg)
+ return False
+ def onFailure(self):
+ return self.sendEmail(self.failuremsg, "")
+ def onStateChangeSuccess(self):
+ return self.sendEmail("Successfully hit " + self.url, "")
- def get_sub_jobs(self):
+ def get_sub_jobs(self, config):
for s in self.servers:
- yield self.ServerChecker(s[0], s[1])
+ yield self.ServerChecker(config, s[0], s[1], s[2])
diff --git a/jobs/JobBase.py b/jobs/JobBase.py
index a7f02c9..29ca443 100755
--- a/jobs/JobBase.py
+++ b/jobs/JobBase.py
@@ -1,7 +1,9 @@
#!/usr/bin/env python
+import time
import random
import logging
+import datetime
import smtplib
@@ -11,27 +13,100 @@ class JobFrequency:
DAY = "day"
DAY_NOON = "day_noon"
+class JobFailureNotificationFrequency:
+ EVERYTIME = "every"
+ EVERYFIVEMINUTES = "5min"
+ EVERYTENMINUTES = "10min"
+ EVERYHOUR = "hour"
+ ONSTATECHANGE = "state_change"
+
class JobBase:
- def __init__(self):
- self.config = None
+ def __init__(self, config):
+ self.config = config
+
+ """ Return a friendly name to identify this Job"""
def getName(self):
return str(self.__class__)
+
+ """Return a non-friendly, guarenteed-unique name to identify this Job
+ Needed to keep track of the job's run history.
+ Takes into account the contructor arguments to uniquely identify JobSpawner-jobs"""
+ def getStateName(self):
+ return self.getName()
+
+ """Returns True if the job should execute this cron-run"""
def shouldExecute(self, cronmode):
frequency = self.executeEvery()
if cronmode == frequency:
return True
return False
- def setConfig(self, config):
- self.config = config
-
+
+ """Returns True if the jobmanager should call 'onFailure' to alert the admin"""
+ def shouldNotifyFailure(self, jobState):
+ notifyFrequency = self.notifyOnFailureEvery()
+ if notifyFrequency == JobFailureNotificationFrequency.EVERYTIME:
+ return True
+ elif notifyFrequency == JobFailureNotificationFrequency.EVERYFIVEMINUTES:
+ now = time.time()
+ lastNotify = jobState.LastNotifyTime
+ if now - lastNotify > datetime.timedelta(minutes=4, seconds=30):
+ return True
+ return False
+ elif notifyFrequency == JobFailureNotificationFrequency.EVERYTENMINUTES:
+ now = time.time()
+ lastNotify = jobState.LastNotifyTime
+ if now - lastNotify > datetime.timedelta(minutes=9, seconds=15):
+ return True
+ return False
+ elif notifyFrequency == JobFailureNotificationFrequency.EVERYHOUR:
+ now = time.time()
+ lastNotify = jobState.LastNotifyTime
+ if now - lastNotify > datetime.timedelta(minutes=59, seconds=0):
+ return True
+ return False
+ elif notifyFrequency == JobFailureNotificationFrequency.ONSTATECHANGE:
+ #Only notify if the last JobState was a Success
+ return jobState.CurrentStateSuccess
+ return True
+
+ """Helper method to send email"""
def sendEmail(self, subject, body, to=""):
return sendEmail(self.config, subject, body, to)
+
+ """OVERRIDE ME
+ Returns a JobFrequency indicating how often the job should be run."""
def executeEvery(self):
pass
+
+ """OVERRIDE ME
+ Returns a JobFailureNotificationFrequency indicating how often a failure
+ notification email should be sent"""
+ def notifyOnFailureEvery(self):
+ pass
+
+ """OVERRIDE ME
+ Executes the job's actions, and returns true to indicate the job succeeded."""
def execute(self):
pass
+ """OVERRIDE ME
+ Notify the admin the job failed. Returns True if the email could be
+ successfully sent.
+ Example: return self.sendEmail(self.subject, self.body, self.notificationAddress)"""
+ def onFailure(self):
+ pass
+
+ """OVERRIDE ME
+ Notify the admin the job succeeded (when it was previously failing). Only used for
+ JobFailureNotificationFrequency.ONSTATECHANGE
+
+ Returns True if the email could be successfully sent.
+ Example: return self.sendEmail(self.subject, self.body, self.notificationAddress)"""
+ def onStateChangeSuccess(self):
+ log.warn(self.getName() + " did not override onStateChangeSuccess")
+ return True
+
def sendEmail(config, subject, body, to=""):
FROM = config.get('email', 'user')
PASS = config.get('email', 'pass')
diff --git a/jobs/JobSpawner.py b/jobs/JobSpawner.py
index 3d09693..50f3043 100755
--- a/jobs/JobSpawner.py
+++ b/jobs/JobSpawner.py
@@ -1,5 +1,7 @@
#!/usr/bin/env python
class JobSpawner:
- def get_sub_jobs(self):
+ """OVERRIDE ME
+ Returns an array (or using 'yield') of Job objects to run"""
+ def get_sub_jobs(self, config):
pass
diff --git a/jobs/PeerChecker.py b/jobs/PeerChecker.py
index f17da53..8211472 100755
--- a/jobs/PeerChecker.py
+++ b/jobs/PeerChecker.py
@@ -9,46 +9,57 @@ import requests
import JobBase
-class PeerChecker(JobBase.JobBase):
- def executeEvery(self):
- return JobBase.JobFrequency.HOUR
- def execute(self):
- testSuccess = True
- peers = self.config.items('peers')
- for p in peers:
- peer = p[1].split(',')
+class PeerChecker(JobSpawner.JobSpawner):
+ class IndividualPeerChecker(JobBase.JobBase):
+ def __init__(self, config, checkurl, notificationAddress):
+ self.checkurl = checkurl
+ self.notificationAddress = notificationAddress
+
+ def executeEvery(self):
+ return JobBase.JobFrequency.HOUR
+ def notifyOnFailureEvery(self):
+ return JobBase.JobFailureNotificationFrequency.EVERYTIME
+ def execute(self):
peerOK = False
- subject = ""
- body = ""
+ self.subject = ""
+ self.body = ""
try:
- response = requests.get(peer[0])
+ response = requests.get(self.checkurl)
if response.status_code != 200:
peerOK = False
- subject = peer[0] + " returned a non-standard status code."
- body = str(response.status_code) + "\n" + response.content
+ self.subject = self.checkurl + " returned a non-standard status code."
+ self.body = str(response.status_code) + "\n" + response.content
else:
if "True" in response.content:
peerOK = True
elif "MailProblem" in response.content:
peerOK = False
- subject = peer[0] + " reports it cannot send email."
- body = str(response.status_code) + "\n" + response.content
+ self.subject = self.checkurl + " reports it cannot send email."
+ self.body = str(response.status_code) + "\n" + response.content
elif "JobProblem" in response.content:
peerOK = False
- subject = peer[0] + " reports its jobs are not running."
- body = str(response.status_code) + "\n" + response.content
+ self.subject = self.checkurl + " reports its jobs are not running."
+ self.body = str(response.status_code) + "\n" + response.content
else:
peerOK = False
- subject = peer[0] + " had an unexpected response."
- body = str(response.status_code) + "\n" + response.content
+ self.subject = self.checkurl + " had an unexpected response."
+ self.body = str(response.status_code) + "\n" + response.content
except Exception as e:
peerOK = False
- subject = peer[0] + " is not responding."
- body = str(e)
+ self.subject = self.checkurl + " is not responding."
+ self.body = str(e)
+ return peerOK
- if not peerOK:
- if not self.sendEmail(subject, body, peer[1]):
- testSuccess = False
- return testSuccess
+ return peerOK:
+ def onFailure(self):
+ return self.sendEmail(self.subject, self.body, self.notificationAddress)
+ def onStateChangeSuccess(self):
+ return self.sendEmail("Successfully hit " + self.checkurl, "", self.notificationAddress)
+
+ def get_sub_jobs(self, config):
+ peers = config.items('peers')
+ for p in peers:
+ (address, email) = p[1].split(',')
+ yield self.IndividualPeerChecker(config, address, email)
diff --git a/jobs/TCPServerChecker.py b/jobs/TCPServerChecker.py
index 711047b..642e188 100755
--- a/jobs/TCPServerChecker.py
+++ b/jobs/TCPServerChecker.py
@@ -9,20 +9,24 @@ import JobSpawner
class TCPServerChecker(JobSpawner.JobSpawner):
servers = [
- #("example.com", 53, "example.com:tcpdns", JobBase.JobFrequency.MINUTE),
+ #("example.com", 53, "example.com:tcpdns", JobBase.JobFrequency.MINUTE, JobBase.JobFailureNotificationFrequency.EVERYTIME)
]
class ServerChecker(JobBase.JobBase):
- def __init__(self, ip, port, friendlyName, frequency):
+ def __init__(self, config, ip, port, friendlyName, frequency, failureNotificationFrequency):
+ self.config = config
self.ip = ip
self.port = port
self.friendlyName = friendlyName + "(" + self.ip + ":" + str(self.port) + ")"
self.frequency = frequency
+ self.failureNotificationFrequency = failureNotificationFrequency
def getName(self):
return str(self.__class__) + " for " + self.friendlyName
def executeEvery(self):
return self.frequency
+ def notifyOnFailureEvery(self):
+ return self.failureNotificationFrequency
def execute(self):
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@@ -30,12 +34,16 @@ class TCPServerChecker(JobSpawner.JobSpawner):
s.close()
return True
except:
- msg = "Could not hit server " + self.friendlyName
- logging.warn(msg)
- return self.sendEmail(msg, "")
+ self.failuremsg = "Could not hit server " + self.friendlyName
+ logging.warn(self.failuremsg)
+ return False
+ def onFailure(self):
+ return self.sendEmail(self.failuremsg, "")
+ def onStateChangeSuccess(self):
+ return self.sendEmail("Successfully hit " + self.friendlyName, "")
- def get_sub_jobs(self):
+ def get_sub_jobs(self, config):
for s in self.servers:
- yield self.ServerChecker(s[0], s[1], s[2], s[3])
+ yield self.ServerChecker(config, s[0], s[1], s[2], s[3], s[4])
diff --git a/jobs/__init__.py b/jobs/__init__.py
index 9955164..0e780eb 100755
--- a/jobs/__init__.py
+++ b/jobs/__init__.py
@@ -44,10 +44,10 @@ class JobFinder:
# It has to do with JobBase being imported multiple times (within jobs) or something
if base.__name__ == 'JobBase':
# A job was found, keep it
- self._jobs.add(obj())
+ self._jobs.add(obj(self.config))
elif base.__name__ == 'JobSpawner':
spawner = obj()
- for j in spawner.get_sub_jobs():
+ for j in spawner.get_sub_jobs(self.config):
self._jobs.add(j)
diff --git a/jobstate.py b/jobstate.py
new file mode 100644
index 0000000..4df0de0
--- /dev/null
+++ b/jobstate.py
@@ -0,0 +1,61 @@
+#!/usr/bin/env python
+
+import time
+import logging
+import datetime
+
+class JobState:
+ def __init__(self, name):
+ self.name = name
+ self.CurrentStateSuccess = True
+
+ def markFailedAndNotify(self):
+ if self.CurrentStateSuccess:
+ self.CurrentStateSuccess = False
+ self.FirstFailureTime = time.time()
+ self.LastNotifyTime = self.FirstFailureTime
+ else:
+ self.LastNotifyTime = time.time()
+
+ def markFailedNoNotify(self):
+ if self.CurrentStateSuccess:
+ logging.warn("Somehow we called markFailedNoNotify, on a success condition, without notifying the user")
+ self.CurrentStateSuccess = False
+ self.FirstFailureTime = time.time()
+ self.LastNotifyTime = 0
+ else:
+ pass
+
+ def markSuccessful(self):
+ if self.CurrentStateSuccess:
+ pass
+ else:
+ self.CurrentStateSuccess = True
+ self.FirstFailureTime = 0
+ self.LastNotifyTime = 0
+
+ def serialize(self):
+ ret = self.name + "|"
+ ret += "Succeeding" if self.CurrentStateSuccess else "Failing"
+ ret += "|" + str(self.FirstFailureTime)
+ ret += "|" + str(self.LastNotifyTime)
+ return ret
+
+ @staticmethod
+ def Parse(line):
+ s = JobState()
+
+ line = line.strip()
+ parts = line.split("|")
+
+ s.name = parts[0]
+ s.CurrentStateSuccess = True if parts[1] == "Succeeding" else False
+ s.FirstFailureTime = float(parts[2])
+ s.LastNotifyTime = float(parts[3])
+
+ return s
+
+ @staticmethod
+ def Empty(name):
+ s = JobState(name)
+ return s \ No newline at end of file
diff --git a/samplejobs/BWAuthChecker.py b/samplejobs/BWAuthChecker.py
index 9116857..2b88e95 100755
--- a/samplejobs/BWAuthChecker.py
+++ b/samplejobs/BWAuthChecker.py
@@ -13,6 +13,8 @@ import JobBase
class BWAuthChecker(JobBase.JobBase):
def executeEvery(self):
return JobBase.JobFrequency.HOUR
+ def notifyOnFailureEvery(self):
+ return JobBase.JobFailureNotificationFrequency.EVERYTIME
def execute(self):
body = ""
url = "https://example.com/bwauth/bwscan.V3BandwidthsFile"
@@ -34,6 +36,9 @@ class BWAuthChecker(JobBase.JobBase):
if body:
logging.warn("tor bwauth is broken?")
logging.warn(body)
- return self.sendEmail("tor bwauth is broken?", body)
+ self.logdetails = body
+ return False
else:
return True
+ def onFailure(self):
+ return self.sendEmail("tor bwauth is broken?", self.logdetails)
diff --git a/samplejobs/MetricsChecker.py b/samplejobs/MetricsChecker.py
index d46c7d3..1d29a81 100755
--- a/samplejobs/MetricsChecker.py
+++ b/samplejobs/MetricsChecker.py
@@ -12,6 +12,8 @@ import JobBase
class MetricsChecker(JobBase.JobBase):
def executeEvery(self):
return JobBase.JobFrequency.DAY_NOON
+ def notifyOnFailureEvery(self):
+ return JobBase.JobFailureNotificationFrequency.EVERYTIME
def execute(self):
body = ""
ys = datetime.date.today() - datetime.timedelta(hours=24)
@@ -27,6 +29,9 @@ class MetricsChecker(JobBase.JobBase):
if body:
logging.warn("tor metrics is broken?")
logging.warn(body)
- return self.sendEmail("tor metrics is broken?", body)
+ self.logdetails = body
+ return False
else:
return True
+ def onFailure(self):
+ return self.sendEmail("tor metrics is broken?", self.logdetails)
diff --git a/settings.cfg.example b/settings.cfg.example
index 3ce86f2..ffac31e 100755
--- a/settings.cfg.example
+++ b/settings.cfg.example
@@ -1,6 +1,7 @@
[general]
servername=uniqueservername
alertcontact=youremail@example.com
+statefile=jobrunhistory.db
[email]
user=agmailaccountyoucreate@gmail.com