Reasonable working version

This commit is contained in:
2024-04-15 23:34:55 +02:00
parent 01d7851fcd
commit dfc5a4616d
2 changed files with 598 additions and 208 deletions

222
routerstats_client.py Executable file
View File

@@ -0,0 +1,222 @@
#!/usr/bin/env python3
import socket
import logging
import time
import os
import rrdtool
logging.basicConfig(
format='%(asctime)s %(funcName)20s %(levelname)-8s %(message)s',
level=logging.INFO,
datefmt='%Y-%m-%d %H:%M:%S')
class NotConnected(Exception):
logging.error('NotConnected error')
pass
class routerstats_client():
def __init__(self, host, port):
self.host = host
self.port = port
self.connected = False
def connect(self):
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
try:
self.sock.connect((self.host, self.port))
self.sock.settimeout(1)
logging.info('Connected to ' + str(self.host))
self.connected = True
except ConnectionRefusedError as error:
logging.error('Could not connect to ' + str(self.host) + ':' + str(self.port))
raise ConnectionRefusedError(error)
def send(self, tosend):
if self.connected:
logging.debug('Sending ' + str(tosend))
send_bytes = bytes(tosend + "\n", 'utf-8')
try:
self.sock.send(send_bytes)
except OSError as error:
logging.error('Cound not send to server: ' + str(error))
self.connected = False
raise NotConnected
try:
self.sock.getpeername()
except OSError as error:
logging.error('Could not send to server: ' + str(error))
self.connected = False
raise NotConnected
else:
logging.error('Not connected to server')
raise NotConnected('Not connected to server')
def recv(self):
if self.connected != True:
logging.error('Trying to recv when not connected')
raise NotConnected
line = b''
blanks = 0
while True:
try:
toret = self.sock.recv(1)
if toret:
line += toret
if blanks > 0:
blanks -= 1
if line.endswith(b'\n'):
#We're done for now, returning value
line = line.strip().decode('utf-8')
logging.debug('Received from server: ' + str(line))
return line
else:
blanks += 1
if blanks >= 5:
blanks = 0
# break
logging.debug('Too many blank reads, and still no complete line')
self.connected = False
raise NotConnected
except OSError as error:
if str(error) == 'timed out':
#This is expected, as it is how we loop
break
logging.error('OSError: ' + str(error))
logging.debug('Got: ' + str(line))
self.connected = False
raise NotConnected(error)
except TimeoutError:
logging.error('Timeout while fetching data, got: ' + str(line))
break
def handle_server_msg(received: str):
'''Do something about something from the server'''
try:
timestamp, zone = received.split()
except ValueError:
logging.error('Could not parse ' + str(received) + ' into two distinct values')
return
try:
timestamp = int(timestamp)
except ValueError:
logging.error('Could not parse ' + str(timestamp) + ' as an int')
return
toret = {'timestamp': timestamp, 'net_dnat': 0, 'loc-net': 0}
try:
toret[zone] += 1
except KeyError as error:
logging.debug('Ignoring zone: ' + str(error))
logging.debug('Parsed to: ' + str(toret))
return toret
def handle(received, client):
'''Do things with whatever came from the server'''
if not received:
return
if received == 'ping':
client.send('pong')
return True
elif received is not None:
if received:
return handle_server_msg(received)
class UpdateRRD:
'''Handle updates to the rrd-file, since we cannot update more than once every second'''
def __init__(self, rrdfile):
self.rrdfile = rrdfile
if os.path.isfile(rrdfile) != True:
self.create()
self.toupdate = {'timestamp': None, 'net_dnat': 0, 'loc-net': 0}
self.freshdict = self.toupdate.copy()
def create(self):
rrdtool.create(self.rrdfile, "--start", "1000000000", "--step", "10", "DS:net_dnat:ABSOLUTE:10:0:U", "DS:loc-net:ABSOLUTE:10:0:U", "RRA:AVERAGE:0.5:1:1d", "RRA:AVERAGE:0.5:1:1M")
logging.debug('Created rrdfile ' + str(self.rrdfile))
def push(self):
#Make sure we're not doing something stupid..
info = rrdtool.info(self.rrdfile)
if self.toupdate['timestamp'] is None:
return False
if info['last_update'] >= self.toupdate['timestamp']:
logging.error('Trying to update when rrdfile is newer than our timestamp. Ignoring line and resetting.')
self.toupdate = self.freshdict.copy()
return False
try:
rrdtool.update(
self.rrdfile,
str(self.toupdate['timestamp'])
+ ':'
+ str(self.toupdate['net_dnat'])
+ ':'
+ str(self.toupdate['loc-net']))
self.toupdate = self.freshdict.copy()
logging.debug('Updated rrdfile')
return True
except rrdtool.OperationalError as error:
logging.error(str(error))
return False
def add(self, input_dict):
if self.toupdate['timestamp'] is None:
#Never touched, just overwrite with whatever we got
self.toupdate = input_dict
elif input_dict['timestamp'] > self.toupdate['timestamp']:
#What we get is fresher than what we have, and noone has done a push yet
self.push()
self.toupdate = input_dict
elif input_dict['timestamp'] == self.toupdate['timestamp']:
#Same timestamp, just up values and be happy
self.toupdate['net_dnat'] += input_dict['net_dnat']
self.toupdate['loc-net'] += input_dict['loc-net']
elif input_dict['timestamp'] < self.toupdate['timestamp']:
logging.error('Newly fetched data is older than what we have in the queue already. Passing.')
else:
logging.error('Not sure what to do here? ' + str(input_dict) + str(self.toupdate))
def main():
rrdfile = 'test.rrd'
rrdupdater = UpdateRRD(rrdfile)
client = routerstats_client('127.0.0.1', 9999)
while True:
try:
client.connect()
break
except ConnectionRefusedError:
time.sleep(1)
tries = 0
loops = 0
while True:
try:
retval = handle(client.recv(), client)
if retval:
if retval == True:
loops = 0
else:
rrdupdater.add(retval)
else:
rrdupdater.push()
loops += 1
if loops >= 60:
logging.error('No data in 60 seconds. We expect a ping/pong every 30. Lost connection, probably')
loops = 0
raise NotConnected
except NotConnected:
try:
client.connect()
tries = 0
except ConnectionRefusedError:
logging.debug('ConnectionRefused')
tries += 1
if tries >= 5:
tries = 5
time.sleep(tries)
except ConnectionRefusedError:
logging.debug('ConnectionRefused')
time.sleep(1)
if __name__ == '__main__':
main()

View File

@@ -1,261 +1,429 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import signal
import os import os
import io import io
import sys
import stat
from multiprocessing import Process, Queue from multiprocessing import Process, Queue
import socketserver
import threading
import queue import queue
from datetime import datetime, timedelta from datetime import datetime, timedelta
import logging import logging
import time import time
from http import server, HTTPStatus import pickle
from socketserver import TCPServer
import pandas as pd from setproctitle import setproctitle
import numpy as np
logging.basicConfig( logging.basicConfig(
format='%(asctime)s %(funcName)20s %(levelname)-8s %(message)s', format='%(asctime)s %(funcName)20s %(levelname)-8s %(message)s',
level=logging.INFO, level=logging.DEBUG,
datefmt='%Y-%m-%d %H:%M:%S') datefmt='%Y-%m-%d %H:%M:%S')
def datathingy(query_queue, query_response, collector_queue, signal_queue): class TimeToQuit(Exception):
'''Handle the datastore. Updating every N secs, fetching anything from the collector queue, and responding to the query queue''' '''Used to pass Quit to subthreads'''
pass
datastore = None class ReloadLog(Exception):
logging.debug('Datastore starting') '''Used to reload log file'''
pass
def filefetcher(filename: str, collector_queue, signal_queue, sleep_sec=0.5, seek_pos=None):
'''Latch onto a file, putting any new lines onto the queue.'''
setproctitle('routerstats-collector file-fetcher')
if float(sleep_sec) <= 0.0:
logging.error('Cannot have sleep_sec <= 0, this breaks the code and your computer:)')
return False
while True: while True:
# As the request is most important, we spin on that with a N ms timeout? Then fetch wathever our logparser got us, rince and repeat..
# Not the most elegant solution, but not that horrible:)
try: try:
request_query = query_queue.get(timeout=0.1) input_file = open(filename, 'r')
req = request_query.split("\n") except FileNotFoundError:
request = req[0] logging.debug('Retry opening ' + filename)
argument = req[1:] time.sleep(1)
logging.debug('Got request: ' + request) continue
logging.debug('Arguments:')
logging.debug(argument)
if datastore is None: retries = 0
logging.debug('datastore has no data at this point') line = ''
query_response.put('NA') start_stat = os.stat(filename)
continue if seek_pos is None:
cur_pos = input_file.seek(0, 0)
else:
cur_pos = input_file.seek(seek_pos, io.SEEK_SET)
if argument[0] == 'all': logging.info('Following ' + filename + ' (inode ' + str(start_stat.st_ino) + ') from pos ' + str(cur_pos))
query_response.put(datastore.loc[datastore['zone'] == request].shape[0])
else: try:
#This better contain two things; a string representing a keyword argument for timedelta while True:
#and something that can be parsed as an integer tmp = input_file.readline()
try: if tmp is not None and tmp != '':
subtract_argument = int(argument[1]) line += tmp
except ValueError: if line.endswith("\n"):
logging.fatal('Could not parse argument as int') line.rstrip()
next if line.isspace():
except IndexError: logging.debug('Empty line is empty, thank you for the newline')
pass else:
#If someone ever knows how this should be done..... logging.debug('Sending line ending at pos ' + str(input_file.tell()))
if argument[0] == 'minutes': parse_and_queue_line(line, collector_queue)
to_subtract = timedelta(minutes = subtract_argument) line = ''
elif argument[0] == 'hours': start_stat = os.stat(filename)
to_subtract = timedelta(hours = subtract_argument) cur_pos = input_file.tell()
elif argument[0] == 'days': else:
to_subtract = timedelta(days = subtract_argument) logging.debug('readline reported line with no \n?')
elif argument[0] == 'weeks':
to_subtract = timedelta(weeks = subtract_argument)
elif argument[0] == 'months':
to_subtract = timedelta(days = subtract_argument)
else: else:
logging.fatal('Was passed unknown argument: %s', argument[0]) #Using got_signal with a timeout of sleep_sec to rate limit the loopiness of this loop:)
next any_signal = got_signal(signal_queue, sleep_sec)
timestamp = datetime.now() - to_subtract if any_signal == 'Quit':
tmpstore = datastore.loc[datastore['timestamp'] > timestamp] shutdown_filefetcher(collector_queue, input_file)
query_response.put(tmpstore.loc[tmpstore['zone'] == request].shape[0]) logging.critical('Shutdown filefetcher')
return True
now_stat = os.stat(filename)
if now_stat.st_ino != start_stat.st_ino:
if now_stat.st_ctime == start_stat.st_ctime:
#Strange, inode has changed, but ctime is the same?
if now_stat.st_size >= start_stat.st_size:
logging.warning('New inode number, but same ctime? Not sure how to handle this. Reopening, but keeping seek position...')
else:
logging.warning('New inode number, same ctime, but smaller? Much confuse, starting from beginning..')
seek_pos = 0
else:
logging.debug('File have new inode number, restarting read from start')
seek_pos = 0
break
if now_stat.st_size < start_stat.st_size:
logging.debug('File is now smaller than last read, restarting from start')
seek_pos = 0
break
except KeyboardInterrupt:
shutdown_filefetcher(collector_queue, input_file)
logging.debug('KeyboardInterrupt, closing file and quitting')
return False
except FileNotFoundError:
'''Input file gone-gone during loop, retry opening a few times'''
logging.debug('File gone away')
next
except (queue.Empty, KeyboardInterrupt): def shutdown_filefetcher(output_queue, input_file):
pass '''Cleanly close filehandles, save log position and queue contents'''
try: cur_pos = input_file.tell()
#For the log parser... input_file.close()
collector_query = collector_queue.get(False) with open('position', 'w') as output_file:
tmpstore = pd.DataFrame(collector_query) logging.debug('Saving current position ' + str(cur_pos))
if datastore is None: output_file.write(str(cur_pos))
datastore = tmpstore dump_queue(output_queue)
else:
datastore = pd.concat([datastore, tmpstore], ignore_index=True)
except (queue.Empty, KeyboardInterrupt):
pass
try:
#Last, the signals..
signal_query = signal_queue.get(False)
if signal_query == 'Quit':
logging.debug('DataStore quitting')
break
except (queue.Empty, KeyboardInterrupt):
pass
def filefetcher(filename: str, collector_queue, sleep_sec=0.1): def got_signal(signal_queue: Queue, sleep_sec: float):
'''Latch onto a file, putting any new lines onto the queue''' '''Read from signal_queue with a timeout of sleep_sec,
line = '' returns either a signal name (whatever text string the queue gave us), or None'''
with open(filename, 'r') as input_file: try:
logging.debug('Following ' + str(input_file)) any_signal = signal_queue.get(timeout=sleep_sec)
#Start at the end of the file logging.critical('Got ' + any_signal)
input_file.seek(0, io.SEEK_END) return any_signal
while True: except queue.Empty:
tmp = input_file.readline() return None
if tmp is not None and tmp != '':
line += tmp def parse_and_queue_line(line: str, collector_queue):
if line.endswith("\n"): '''Parse and queue the result'''
line.rstrip() to_ret = parse_line(line)
to_ret = parse_line(line) if to_ret:
collector_queue.put(to_ret) collector_queue.put(to_ret)
line = ''
elif sleep_sec:
time.sleep(sleep_sec)
def parse_line(input_line: str) -> dict: def parse_line(input_line: str) -> dict:
'''Parse input line into (at least) the following details: '''Fetch fourth space-separated item as zone, prepend with unix timestamp from the 3 first items'''
zone: #Input line: Month DayOfMonth HH:MM:SS hostname zone....
net_dnat or loc_net #The input line is in the local timezone
SRC: #Seems like python known about our timezone, so when converting to unix timestamp from random text
source ip #it should handle changes from CET to CEST
DST: try:
destination ip input_line = input_line.split("\n")[0] #No idea why it appends a newline if I don't do this?
PROTO: space_split = input_line.split(' ')
protcol month = space_split[0]
SPT: dateint = space_split[1]
source port timestr = space_split[2]
DPT zone = space_split[4]
destination port''' now = datetime.now()
retval = {} except IndexError:
input_line = input_line.split("\n")[0] #No idea why it appends a newline if I don't do this? return None
space_split = input_line.split(' ') logline_time = datetime.strptime(str(now.year) + ' ' + month + ' ' + str(dateint) + ' ' + str(timestr), '%Y %b %d %H:%M:%S')
retval['zone'] = space_split[4] #If this is in the future, this probably means the data is from last year
for stringy in space_split: if logline_time > now:
try: #We're in the future. Prettu sure the future is not yet, we're in last year
key, val = stringy.split('=') #This might happen around first of january
retval[str(key)] = [str(val)] logline_time = logline_time.replace(year=logline_time.year - 1)
except ValueError: timestamp = int(logline_time.timestamp())
#Log entries are not perfectly symmetrical. We don't care retval = (timestamp, zone)
pass
retval['timestamp'] = datetime.now()
logging.debug('Parsed line to ' + str(retval)) logging.debug('Parsed line to ' + str(retval))
return retval return retval
def dump_queue(queue_to_dump: Queue):
'''Write the contents of a queue to a list that we pickle to a file'''
class RequestHandler(server.SimpleHTTPRequestHandler): #We use pickle, every entry in the queue is one entry in a list
'''Subclassing to change behaviour''' if queue_to_dump.empty():
def send_head(self): #Nothing to save, nothing to do
self.send_response(HTTPStatus.OK) logging.debug('Empty queue')
self.send_header("Content-type", "text/html") return
self.end_headers() out_list = []
while True:
def do_GET(self):
if self.path == '/favicon.ico':
self.send_response(HTTPStatus.NOT_FOUND);
self.end_headers()
return
self.send_head()
r = []
r.append('<!DOCTYPE HTML>')
r.append('<html lang=en>')
r.append('<head><title>Statistics</title></head>')
r.append('<style>')
r.append('table, th, td { border: 1px solid black; border-collapse: collapse; }')
r.append('td:not(:first-child) { text-align: right; }')
r.append('table { width: 90%; }')
r.append('</style>')
r.append('<body>\n')
r.extend(self.fill_body())
r.append('</body>\n')
r.append('</html>\n')
encoded = '\n'.join(r).encode('UTF-8', 'surrogateescape')
f = io.BytesIO()
f.write(encoded)
f.seek(0)
self.copyfile(f, self.wfile)
f.close()
def fetch_data(self, command: str, argument: list):
'''Communicate with datastore via queue'''
r = []
if type(argument) is str:
arg = []
arg.append(argument)
argument = arg
str_args = []
for args in argument:
str_args.append(str(args))
self.server.queue_out.put(command + "\n" + "\n".join(str_args))
try: try:
retval = self.server.queue_in.get(timeout=1) out_list.append(queue_to_dump.get_nowait())
except queue.Empty: except queue.Empty:
retval = 'query timeout' break
logging.debug('Returning: %s', retval) if out_list:
return str(retval) logging.debug('Saving ' + str(len(out_list)) + ' entries to dump.pickle')
to_save = pickle.dumps(out_list)
with open('dump.pickle', 'wb') as output_file:
bytes_written = output_file.write(to_save)
logging.debug('Saved ' + str(len(out_list)) + ' entries, taking ' + str(bytes_written) + ' bytes')
def fetch_data_for_connections(self, direction: str): def signal_handler(signum, frame):
r = [] '''Handle signals in a sensible way, I guess?'''
r.append('<tr>') if signum == signal.SIGTERM:
r.append('<td>' + direction + '</td>') logging.critical('Asked to quit')
for tofetch in (['all'], ['minutes', '1'], ['hours', '1'], ['days', '1'], ['weeks', 1], ['months', '1']): raise TimeToQuit('Received signal ' + signal.Signals(signum).name)
retval = self.fetch_data(direction, tofetch)
r.append('<td>' + str(retval) + '</td>')
r.append('</tr>')
return r
def fill_body(self): def load_pickled_file(output_queue):
r = [] '''Load queue contents from pickled queue structure'''
r.append('<h1>Statistics</h1>') #Does our dump file exist?
r.append('<table><tr><th>Direction</th><th>All</th><th>Last minute</th><th>Last hour</th><th>Last day</th><th>Last week</th><th>Last month</th></tr>') loadfile = 'dump.pickle'
for direction in ('net_dnat', 'loc-net'): if os.path.isfile(loadfile):
r.extend(self.fetch_data_for_connections(direction)) size = os.stat(loadfile).st_size
r.append('</table>') logging.debug(loadfile + ' exists, loading ' + str(size) + ' bytes.')
return r #This is already parsed lines, dump them straight into the file_parser_result_queue
#Saved format is [(timestamp, parseresult), ..]
with open(loadfile, 'rb') as input_file:
#Yes, I know, loads is unsafe:)
loaded_data = pickle.loads(input_file.read())
for entry in loaded_data:
output_queue.put(entry)
logging.debug('Put ' + str(len(loaded_data)) + ' entries on the queue')
logging.debug('Deleting old dump')
os.unlink(loadfile)
def serve_http(httpd_port: int, queue_in, queue_out): def load_start_pos(logfile):
TCPServer.allow_reuse_address = True '''Read start position from file, if it exists'''
with server.ThreadingHTTPServer(("", httpd_port), RequestHandler) as httpd: #Do we have any position we want to start from?
httpd.queue_in = queue_in if os.path.isfile('position'):
httpd.queue_out = queue_out with open('position', 'r') as input_file:
logging.info("serving at port %s", httpd_port) tmp_start_pos = input_file.readline()
httpd.serve_forever() try:
tmp_start_pos = int(tmp_start_pos)
except ValueError:
logging.error('Could not parse ' + str(tmp_start_pos) + ' as an integer')
return None
logging.debug('Loaded position ' + str(tmp_start_pos))
size = os.stat(logfile).st_size
logging.debug('log file size is ' + str(size))
if tmp_start_pos <= size:
return tmp_start_pos
return None
class RequestHandler(socketserver.BaseRequestHandler):
'''derived BaseRequestHandler'''
def handle(self):
logging.info('Connected to ' + str(self.client_address[0]))
self.request.settimeout(5)
start_time = datetime.now()
while True:
try:
if self.overflowqueue.empty != True:
tosend = self.overflowqueue.get_nowait()
if tosend:
self.send(tosend)
except queue.Empty:
pass
try:
if self.input_queue.empty != True:
event = self.input_queue.get(timeout=1)
tosend = str(event[0]) + ' ' + event[1]
try:
self.send(tosend)
except BrokenPipeError:
logging.error('Client gone')
self.overflowqueue.put(tosend)
break
try:
peer = self.request.getpeername()
except OSError as error:
logging.error('Client gone')
self.overflowqueue.put(tosend)
break
except queue.Empty:
pass
try:
signal = self.signal_queue.get_nowait()
logging.debug('Signal: ' + str(signal))
if signal == 'Quit':
logging.info('Asked to quit')
break
except queue.Empty:
pass
now_time = datetime.now()
diff_time = now_time - start_time
if diff_time.total_seconds() >= 30:
start_time = now_time
#Long time, no see, time to pingpong the client:)
if self.ping_client() != True:
break
logging.debug('Request abandoned')
def send(self, tosend):
'''Wrap sendall'''
logging.debug('Sending ' + str(tosend))
self.request.sendall(bytes(tosend + "\n", 'utf-8'))
def set_queue(self, input_queue, overflowqueue, signal_queue):
'''Set Queue for fetching events'''
self.input_queue = input_queue
self.overflowqueue = overflowqueue
self.signal_queue = signal_queue
def ping_client(self):
'''Send ping to a client, expect pong back, else close socket'''
try:
self.send('ping')
except BrokenPipeError:
#Someone closed our socket
logging.debug('Broken pipe')
return False
try:
response = self.request.recv(1024)
if response:
if response.strip() == b'pong':
logging.debug('Client said pong, all good')
return True
else:
logging.debug('No reply')
return False
except TimeoutError:
logging.debug('Timeout')
return False
except ConnectionResetError:
logging.debug('Connection reset, closing socket')
self.request.close()
return False
except OSError as error:
if str(error) == 'timed out':
#Client probably just slacks
logging.debug('Timeout')
else:
logging.error('Peer gone?: ' + str(error))
return False
def socket_server(file_parser_result_queue, overflowqueue, socket_server_signal_queue):
'''Socket server sending whatever data is in the queue to any client connecting'''
#Multiple connections here is probably a horrible idea:)
setproctitle('routerstats-collector socket_server')
host, port = '', 9999
while True:
try:
socketserver.TCPServer.allow_reuse_address = True
socketserver.TCPServer.allow_reuse_port = True
server = socketserver.TCPServer((host, port), RequestHandler)
server.timeout = 1
with server:
server.RequestHandlerClass.set_queue(server.RequestHandlerClass, file_parser_result_queue, overflowqueue, socket_server_signal_queue)
logging.info('Socket up at ' + host + ':' + str(port))
while True:
try:
logging.debug('Waiting for request')
server.handle_request()
except KeyboardInterrupt:
logging.debug('Received KeyboardInterrupt')
try:
server.server_close()
except Exception as e:
logging.exception(e)
return
except ValueError:
#This seems to happen whenever the socket is closed somewhere else, but handle_request still runs
try:
server.server_close()
except Exception as e:
logging.exception(e)
break
try:
recvd_signal = socket_server_signal_queue.get_nowait()
if recvd_signal == 'Quit':
logging.debug('Received Quit')
server.server_close()
return
except queue.Empty:
pass
except OSError as error:
logging.info('Waiting for Address to become available: ' + str(error))
time.sleep(1)
continue
def main(): def main():
'''Main thingy''' '''Main thingy'''
httpd_port = 8000 setproctitle('routerstats-collector main-thread')
file_to_follow = '/var/log/ulog/syslogemu.log' logging.debug('Starting as PID ' + str(os.getpid()))
query_queue = Queue() file_to_follow = 'syslogemu.log'
query_response = Queue() #Just quit early if file is missing..
collector_queue = Queue() if os.path.isfile(file_to_follow) is False:
signal_queue = Queue() logging.error('Could not find file ' + file_to_follow)
sys.exit()
file_parser_result_queue = Queue()
file_parser_signal_queue = Queue()
overflowqueue = Queue()
socket_server_signal_queue = Queue()
started_processes = [] started_processes = []
dead_processes = 0
datastore_process = Process(target=datathingy, args=(query_queue, query_response, collector_queue, signal_queue)) load_pickled_file(file_parser_result_queue)
datastore_process.start() start_pos = load_start_pos(file_to_follow)
started_processes.append(datastore_process)
collector_process = Process(target=filefetcher, args=(file_to_follow, collector_queue)) file_parser_process = Process(target=filefetcher, daemon=True, args=(file_to_follow, file_parser_result_queue, file_parser_signal_queue, 0.5, start_pos))
collector_process.start() file_parser_process.start()
started_processes.append(collector_process) logging.debug('Started filefetcher as pid ' + str(file_parser_process.pid))
started_processes.append((file_parser_process, file_parser_signal_queue))
http_process = Process(target=serve_http, args=(httpd_port, query_response, query_queue)) #We're not writing directly to an rrd,
http_process.start() #This goes out to a socket
started_processes.append(http_process) #As soon as a client connects to the socket, we send what we have in queue
#Client can do whatever it wants with this
#This means any "malicious" connections will wipe the history
#We're fine with this
socket_server_process = Process(target=socket_server, daemon=True, args=(file_parser_result_queue, overflowqueue, socket_server_signal_queue))
socket_server_process.start()
logging.debug('Socket server started as pid ' + str(socket_server_process.pid))
started_processes.append((socket_server_process, socket_server_signal_queue))
#rrd_stuffer_process = Process(target=rrd_stuffer, args(file_parser_result_queue, rrd_stuffer_signal_queue))
#rrd_stuffer_process.start()
#started_prcesses.append(rrd_stuffer_process)
#signal_queues.append(rrd_stuffer_signal_queue)
signal.signal(signal.SIGTERM, signal_handler) #Make sure subthreads get the info:)
#No idea how to manage this better? #No idea how to manage this better?
while True: while True:
try: try:
for p in started_processes: for p in started_processes:
if p.is_alive(): if p[0].is_alive():
pass pass
else:
logging.error(p[0].name + ' has died prematurely?')
p[0].join()
dead_processes += 1
if dead_processes >= len(started_processes):
logging.error('All processes has gone away :/')
sys.exit()
time.sleep(0.1) time.sleep(0.1)
except KeyboardInterrupt: except (KeyboardInterrupt, TimeToQuit):
for p in started_processes: for p in started_processes:
if p.is_alive(): if p[0].is_alive():
retval = p.join(timeout=1) p[1].put('Quit')
if retval is None: p[0].join(timeout=5)
p.kill() if p[0].is_alive():
logging.error('Timeout waiting for shutdown, killing child PID: ' + str(p[0].pid))
p[0].kill()
break break
if __name__ == '__main__': if __name__ == '__main__':