Compare commits

..

7 Commits

30 changed files with 926 additions and 471 deletions

View File

@ -9,6 +9,9 @@ verify_ssl = true
keyboard = "*" keyboard = "*"
beautifulsoup4 = "*" beautifulsoup4 = "*"
pytz = "*" pytz = "*"
flask = "*"
nltk = "*"
spacy = "*"
[requires] [requires]
python_version = "3.9" python_version = "3.8"

19
TODO.md
View File

@ -1,26 +1,33 @@
# TODO # TODO
## 1. Main modulo ## Main modulo
1. Workers para 1. Workers para
1. [x] Revisar Email 1. [x] Revisar Email
1. [ ] Revisar WhatsApp 1. [ ] Revisar WhatsApp
1. [ ] Procesar Texto 1. [ ] Procesar Texto
## 2. Modulo de Revision de Email ## Modulo de Revision de Email
Para revisar si hay mails nuevos, validando que sea de las personas registradas. Para revisar si hay mails nuevos, validando que sea de las personas registradas.
Si no está registrada se avisa a administrador para saber que hacer. Si no está registrada se avisa a administrador para saber que hacer.
Limpieza de Inbox y Spam. Limpieza de Inbox y Spam.
- [x] Revisar lista de emails - [x] Revisar lista de emails
- [ ] Revisar si fue revisado - [x] Revisar si fue revisado
- [x] Revisar si corresponde a un "Jefe" - [x] Revisar si corresponde a un "Jefe"
- [x] Confirmar con cerebro si es un comando
## 3. Modulo de WhatsApp ## Modulo de WhatsApp
Respuestas a mensajes. Respuestas a mensajes.
## 4. Modulo de Recordatorio ## AI para procesar textos
Spacy permite procesar texto
Hay que poder "educar" el modelo
Hay que poder agregar datos
Hay que poder agregar los correos y mensajes que no se conocen a listado "desconocido"
## Modulo de Recordatorio
Crear recordatorios y mandar correo o WhatsApp en el momento del recordatorio. Crear recordatorios y mandar correo o WhatsApp en el momento del recordatorio.
## 5. Modulo de Registro de Eventos ## Modulo de Registro de Eventos
Ir llevando un registro de eventos. Ir llevando un registro de eventos.
### Otros ### Otros

View File

@ -3,77 +3,80 @@ import datetime
import locale import locale
from threading import Thread from threading import Thread
import queue import queue
from src.functions import dump_queue
def get_today(tz):
today = datetime.datetime.now(tz=tz)
locale.setlocale(locale.LC_TIME, 'es_ES')
return today
class Logger: class Logger:
""" """
Clase que lleva el diario de actividades de la secretaria Clase que lleva el diario de actividades de la secretaria
""" """
def __init__(self, log_folder, timezone): def __init__(self, log_folder, timezone, name):
self.folder = log_folder self.folder = log_folder
self.tz = timezone self.tz = timezone
self.name = name
self.messages = [] self.messages = []
def load_last(self):
files = [f for f in os.listdir(self.folder) if os.path.isfile(os.path.join(self.folder, f))]
today = datetime.datetime.now(tz=self.tz)
if len(files) == 0:
self.messages = []
return
last = files[-1]
self.messages = []
with open(os.path.join(self.folder, last), 'r') as f:
self.messages.append(f.readline())
def get_filename(self): def get_filename(self):
files = [f for f in os.listdir(self.folder) if os.path.isfile(os.path.join(self.folder, f)) and 'dairy' in f] files = [f for f in os.listdir(self.folder) if os.path.isfile(os.path.join(self.folder, f)) and 'diary' in f]
today = datetime.datetime.now(tz=self.tz) today = datetime.datetime.now(tz=self.tz)
if len(files) == 0: if len(files) == 0:
return os.path.join(self.folder, 'diary-{0}.log'.format(today.strftime('%Y-%m-%d'))) return os.path.join(self.folder, self.name + '-diary-{0}.log'.format(today.strftime('%Y-%m-%d')))
last = files[-1] last = files[-1]
return os.path.join(self.folder, last) return os.path.join(self.folder, last)
def start_new(self): def start_new(self):
today = datetime.datetime.now(tz=self.tz) today = datetime.datetime.now(tz=self.tz)
filename = os.path.join(self.folder, 'diary-{0}.log'.format(today.strftime('%Y-%m-%d'))) filename = os.path.join(self.folder, self.name + '-diary-{0}.log'.format(today.strftime('%Y-%m-%d')))
with open(filename, 'w') as f: with open(filename, 'w') as f:
pass pass
def start_log(self): def start_log(self):
today = datetime.datetime.now(tz=self.tz) today = datetime.datetime.now(tz=self.tz)
locale.setlocale(locale.LC_TIME, 'es_ES') locale.setlocale(locale.LC_TIME, 'es_ES')
msg = 'Inicio un nuevo dia siendo las {0} del {1}'.format(today.strftime('%H:%M:%S'), msg = 'Inicio una nueva jornada siendo las {0} del {1}'.format(today.strftime('%H:%M:%S'),
today.strftime('%d de %B de %Y')) today.strftime('%d de %B de %Y'))
self.log(msg) self.log(msg)
def stop_log(self): def stop_log(self):
now = datetime.datetime.now(tz=self.tz) now = datetime.datetime.now(tz=self.tz)
locale.setlocale(locale.LC_TIME, 'es_ES') locale.setlocale(locale.LC_TIME, 'es_ES')
msg = 'Siendo las {0}, termino mis registros por hoy'.format(now.strftime('%H:%M:%S')) msg = 'Siendo las {0}, termino mis registros por esta jornada'.format(now.strftime('%H:%M:%S'))
self.log(msg) self.log(msg)
msg = '--------' msg = '--------'
self.log(msg) self.log(msg)
def log_action(self, action): def log_action(self, action):
today = datetime.datetime.now(tz=self.tz) msg = 'he realizado {0}'.format(action)
locale.setlocale(locale.LC_TIME, 'es_ES') self.log_msg(msg)
msg = 'A las {0} del {1}, he realizado {2}'.format(today.strftime('%H:%M:%S'),
today.strftime('%d de %B de %Y'), action)
self.log(msg)
def log_not_action(self, action): def log_not_action(self, action):
today = datetime.datetime.now(tz=self.tz) msg = 'no he podido realizar {0}'.format(action)
locale.setlocale(locale.LC_TIME, 'es_ES') self.log_msg(msg)
msg = 'A las {0} del {1}, no he podido realizar {2}'.format(today.strftime('%H:%M:%S'),
today.strftime('%d de %B de %Y'), action) def start_turn(self, action):
self.log(msg) self.log_msg('Inicio de turno de {0}'.format(action))
def end_turn(self, action):
self.log_msg('Termino de turno de {0}'.format(action))
def log_msg(self, msg):
today = get_today(self.tz)
line = 'A las {0} del {1}, {2}'.format(today.strftime('%H:%M:%S'), today.strftime('%d de %B de %Y'), msg)
self.log(line)
def log(self, message): def log(self, message):
line = message.rstrip('.') + '.' line = message
if line[-1] != '.' and line != '--------':
line = line.rstrip('.') + '.'
self.messages.append(line) self.messages.append(line)
if len(self.messages) > 1000: if len(self.messages) > 1000:
self.start_new() self.start_new()
self.load_last()
with open(self.get_filename(), 'a') as f: with open(self.get_filename(), 'a') as f:
f.write(line + "\n") f.write(line + "\n")
@ -81,28 +84,40 @@ class Logger:
class Worker(Thread): class Worker(Thread):
def __init__(self, params, configs): def __init__(self, params, configs):
super().__init__() super().__init__()
self.event = params['events']['stop'] self.event = params['events']['log_stop']
self.queue = params['queues']['log'] self.queue = params['queues']['log']
self.wait = configs.get('supervisor.wait') self.wait = configs.get('supervisor.wait')
self.logger = Logger(params['folders']['log'], configs.get('timezone')) self.logger = Logger(params['folders']['log'], configs.get('timezone'), params['logger']['name'])
self.logging = params['logging'] self.logging = params['logging']
def parse_message(self, message):
if 'is_start' in message and message['is_start']:
self.logger.start_log()
return
if 'action' in message:
if 'not' in message and message['not']:
self.logger.log_not_action(message['action'])
return
self.logger.log_action(message['action'])
return
if 'start_turn' in message:
self.logger.start_turn(message['start_turn'])
return
if 'end_turn' in message:
self.logger.end_turn(message['end_turn'])
return
self.logger.log_msg(message['message'])
def run(self): def run(self):
self.logging.log('Starting', caller=type(self)) self.logging.log('Starting', caller=type(self))
self.queue.put({'is_start': True})
while not self.event.is_set(): while not self.event.is_set():
self.logging.log('Looping status {0}'.format(not self.event.is_set()), caller=type(self))
try: try:
message = self.queue.get(timeout=self.wait) message = self.queue.get(timeout=self.wait)
self.logging.log('Logger received message', caller=type(self)) self.logging.log('Logger received message', caller=type(self))
if 'is_start' in message and message['is_start']: self.parse_message(message)
self.logger.start_log()
continue
if 'not' in message and message['not']:
self.logger.log_not_action(message['action'])
continue
self.logger.log_action(message['action'])
except queue.Empty: except queue.Empty:
pass continue
[self.parse_message(message) for message in dump_queue(self.queue, self.wait)]
self.logger.stop_log() self.logger.stop_log()
self.logging.log('Exiting', caller=type(self)) self.logging.log('Exiting', caller=type(self))
return

View File

@ -6,9 +6,10 @@ class Logging:
""" """
Clase que registra los eventos del flujo del programa en archivos Clase que registra los eventos del flujo del programa en archivos
""" """
def __init__(self, timezone, folder=None): def __init__(self, timezone, folder=None, name=''):
self.tz = timezone self.tz = timezone
self.folder = folder self.folder = folder
self.name = name
self.filename = self.get_filename() self.filename = self.get_filename()
self.log('------') self.log('------')
@ -19,7 +20,7 @@ class Logging:
if os.path.getsize(os.path.join(self.folder, filename)) < 1024 * 1024: if os.path.getsize(os.path.join(self.folder, filename)) < 1024 * 1024:
return filename return filename
today = datetime.datetime.now(tz=self.tz) today = datetime.datetime.now(tz=self.tz)
filename = 'logging-{0}.log'.format(today.strftime('%Y-%m-%d')) filename = '-'.join([self.name, 'logging-{0}.log'.format(today.strftime('%Y-%m-%d'))]).strip('-')
base_name = filename base_name = filename
n = 1 n = 1
while os.path.isfile(os.path.join(self.folder, filename)): while os.path.isfile(os.path.join(self.folder, filename)):

View File

@ -1,7 +1,24 @@
{ {
"imap_server": "imap.yandex.com", "imap": {
"port": 993, "server": "imap.yandex.com",
"username": "secretary@incoviba.cl", "port": 993,
"password": "quzshqzyfcnydevp", "user": {
"ssl": true "name": "secretary@incoviba.cl",
"password": "vgvjuwzwizktdpka"
},
"ssl": true
},
"smtp": {
"server": "smtp.yandex.com",
"port": 495,
"user": {
"name": "secretary@incoviba.cl",
"password": "vgvjuwzwizktdpka"
},
"ssl": true
},
"max": 5,
"consultas": "email_consultas.json",
"spam": "email_spam.json",
"revisados": "email_revisados.json"
} }

View File

@ -1,3 +1,4 @@
{ {
"wait": 15 "wait": 15,
"timezone": "America/Santiago"
} }

48
entry/api.py Normal file
View File

@ -0,0 +1,48 @@
from flask import Flask, redirect, url_for
import os
from src.instrucciones import Instrucciones
import json
from src.brain.build_data import brain_app
app = Flask(__name__)
app.register_blueprint(brain_app, url_prefix='/brain')
data_folder = os.path.join(os.path.realpath('..'), 'data')
@app.route('/', methods=['GET'])
def index():
return {
'api': {
'entrypoints': {
'bosses': [
'add',
'/'
],
'instructions': [
'add',
'/'
],
'email': []
}
}
}
@app.route('/instructions/', methods=['GET'])
def instructions():
instrucciones = Instrucciones(data_folder)
data = {'Instrucciones': [{'Name': i.instruccion, 'Aliases': i.aliases} for i in instrucciones.instrucciones]}
return json.dumps(data)
@app.route('/instructions/add/<string:instruccion>/<string:alias>')
def add_instruccion(instruccion, alias):
ins = Instrucciones(data_folder)
ins.add(instruccion, [alias])
ins.save()
return redirect(url_for('instructions'))
if __name__ == '__main__':
app.run(port=8081, debug=True)

View File

@ -1,53 +1,5 @@
import argparse import argparse
import imaplib from src.email.functions import connect, check_inbox
import email
# Email class that fetches emails from server by uid and can parse body for secretary, can also delete email by uid
class Email:
def __init__(self, uid):
self.uid = uid
self.message = ''
def get(self, imap):
status, raw_data = imap.uid('fetch', self.uid, '(RFC822)')
if status != 'OK':
raise Exception('Could not recover message {0}'.format(self.uid))
self.message = email.message_from_bytes(raw_data[0][1])
def delete(self, imap):
status, result = imap.uid('STORE', self.uid, '+FLAGS', '(\\Deleted)')
if status != 'OK':
raise Exception('Could not flag message {0}'.format(self.uid))
def connect(imap_url, port, username, password, ssl=False):
if ssl:
server = imaplib.IMAP4_SSL(imap_url, port=port)
else:
server = imaplib.IMAP4(imap_url, port=port)
server.login(username, password)
return server
def check_inbox(server):
status, msg = server.select('INBOX')
if status != 'OK':
return None
status, ids = server.uid('search', None, 'All')
if status != 'OK':
return None
ids = ids[0].decode().split()
emails = []
for mid in ids:
em = Email(mid)
em.get(server)
emails.append(em)
return emails
def main(args): def main(args):

View File

@ -1,44 +0,0 @@
import os
from threading import Thread
import queue
import json
class QuestionWorker(Thread):
def __init__(self, params, configs):
super(QuestionWorker, self).__init__()
self.queue = params['queues']['questions']
self.logging = params['logging']
self.logger = params['queues']['log']
self.event = params['events']['stop']
self.wait = configs.get('supervisor.wait')
self.folder = params['folders']['data']
def question(self, text):
filename = os.path.join(self.folder, 'questions.json')
with open(filename, 'w+', encoding='utf8') as f:
try:
data = json.load(f)
except json.decoder.JSONDecodeError:
data = []
if text in data:
return
data.append(text)
json.dump(data, f, ensure_ascii=False, indent=4)
self.logging.log(text, type(self))
def run(self):
self.logging.log('Starting', type(self))
self.logger.put({'action': type(self)})
while not self.event.is_set():
try:
question = self.queue.get(timeout=self.wait)
if question.type == 'email':
self.question(
'¿Que hago con este correo de {0} y el texto dice {1} en la fecha {2}?'.
format(question.sender, question.text, question.datetime.strftime('%d-%m-%Y %H:%M:%S'))
)
except queue.Empty:
pass
self.logging.log('Exiting', type(self))
return

View File

@ -12,8 +12,8 @@ class Boss:
class Bosses: class Bosses:
def __init__(self, data_folder): def __init__(self, data_folder):
filename = os.path.join(data_folder, 'bosses.json') self.filename = os.path.join(data_folder, 'bosses.json')
with open(filename, 'r') as f: with open(self.filename, 'r') as f:
data = json.load(f) data = json.load(f)
self.bosses = [] self.bosses = []
addrs = AddressBook(data_folder) addrs = AddressBook(data_folder)
@ -35,3 +35,29 @@ class Bosses:
if a in name: if a in name:
return True return True
return False return False
def add_boss(self, name, aliases: list = None):
if self.is_boss(name):
return
b = Boss()
b.full_name = name
if aliases is not None:
b.aliases = aliases
def get(self, name):
if not self.is_boss(name):
return None
for i, boss in enumerate(self.bosses):
if boss.full_name in name:
return i
for m in boss.contact.emails:
if m in name:
return i
for a in boss.aliases:
if a in name:
return i
def save(self):
data = [{'full_name': boss.full_name, 'aliases': boss.aliases} for boss in self.bosses]
with open(self.filename, 'w') as f:
json.dump(data, f, indent=4)

3
src/brain/aiml.py Normal file
View File

@ -0,0 +1,3 @@
class Aiml:
def __init__(self):
pass

29
src/brain/brain.py Normal file
View File

@ -0,0 +1,29 @@
import os
import spacy
from src.instrucciones import Instrucciones
class Brain:
def __init__(self, data_folder):
self.folder = data_folder
self.filename = os.path.join(data_folder, 'brain.json')
self.nlp = None
self.load_nlp(data_folder)
def load_nlp(self, data_folder):
folder = os.path.join(data_folder, 'model')
self.nlp = spacy.load(folder)
def save_model(self):
folder = os.path.join(self.folder, 'model')
self.nlp.to_disk(folder)
def get_command(self, phrase):
doc = self.nlp(phrase)
command = max(doc.cats, key=doc.cats.get)
return command
def get_response(self, command, phrase):
doc = self.nlp(phrase)
return doc

20
src/brain/build_data.py Normal file
View File

@ -0,0 +1,20 @@
import argparse
import os
from flask import Blueprint, request
import json
brain_app = Blueprint('brain_blueprint', __name__)
@brain_app.route('/', methods=['GET'])
def index():
return {
'api': '/brain'
}
@brain_app.route('/training/data/add', methods=['POST'])
def add_data():
input_data = json.loads(request.data)
return input_data

33
src/brain/build_model.py Normal file
View File

@ -0,0 +1,33 @@
import argparse
import os
import spacy
from src.instrucciones import Instrucciones
def load_model(commands):
nlp = spacy.load('es_core_news_sm')
if 'textcat' not in nlp.pipe_names:
textcat = nlp.create_pipe('textcat')
nlp.add_pipe(textcat)
textcat.add_label('test')
for c in commands.instrucciones:
textcat.add_label(c.instruccion)
return nlp
def save_model(data_folder, model):
folder = os.path.join(data_folder, 'model')
model.to_disk(folder)
def main(args):
commands = Instrucciones(args.data_folder)
model = load_model(commands)
save_model(args.data_folder, model)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--data_folder')
_args = parser.parse_args()
main(_args)

33
src/brain/train_model.py Normal file
View File

@ -0,0 +1,33 @@
import argparse
import os
import spacy
import json
def load_model(folder):
return spacy.load(folder)
def get_data(folder):
files = [f for f in os.listdir(folder) if os.path.isfile(os.path.join(folder, f))]
data = []
for filename in files:
with open(filename, 'r') as f:
data += json.load(f)
return data
def train_model(model, data):
optimizer = model.begin_training()
pass
def main(args):
pass
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('-f', '--data_folder')
_args = parser.parse_args()
main(_args)

View File

@ -0,0 +1 @@
from .message import Message

View File

@ -1,8 +1,9 @@
class Message: class Message:
def __init__(self, mtype, sender, datetime, text, original): def __init__(self, mtype, sender, datetime, text, original, subject):
self.type = mtype self.type = mtype
self.sender = sender self.sender = sender
self.datetime = datetime self.datetime = datetime
self.text = text self.text = text
self.checked = False self.checked = False
self.original = original self.original = original
self.subject = subject

View File

@ -1,4 +0,0 @@
from .email_interpreter import EmailInterpreter
from .revisor_worker import RevisorEmailWorker
from .obtenedor import Obtenedor
from .validador import Validador

19
src/email/definitions.py Normal file
View File

@ -0,0 +1,19 @@
from email.parser import BytesParser
from email.policy import default as DefaultPolicy
class Email:
def __init__(self, uid):
self.uid = uid
self.message = ''
def get(self, imap):
status, raw_data = imap.uid('fetch', self.uid, '(RFC822)')
if status != 'OK':
raise Exception('Could not recover message {0}'.format(self.uid))
self.message = BytesParser(policy=DefaultPolicy).parsebytes(text=raw_data[0][1])
def delete(self, imap):
status, result = imap.uid('STORE', self.uid, '+FLAGS', '(\\Deleted)')
if status != 'OK':
raise Exception('Could not flag message {0}'.format(self.uid))

View File

@ -1,10 +0,0 @@
from threading import Thread
class EmailInterpreter(Thread):
def __init__(self, configs, params):
super().__init__()
self.logging = params['logging']
def run(self):
self.logging.log('Starting', type(self))

39
src/email/functions.py Normal file
View File

@ -0,0 +1,39 @@
import keyboard
import imaplib
from src.email.definitions import Email
def exit_thread(stop, logger):
logger.log('Starting exit thread', caller='exit_thread')
keyboard.wait('Esc')
logger.log('Escape pressed', caller='exit_thread')
stop.set()
logger.log('Exit signal sent', caller='exit_thread')
def connect(imap_url, port, username, password, ssl=False):
if ssl:
server = imaplib.IMAP4_SSL(imap_url, port=port)
else:
server = imaplib.IMAP4(imap_url, port=port)
server.login(username, password)
return server
def check_inbox(server):
status, msg = server.select('INBOX')
if status != 'OK':
return None
status, ids = server.uid('search', None, 'All')
if status != 'OK':
return None
ids = ids[0].decode().split()
emails = []
for mid in ids:
em = Email(mid)
em.get(server)
emails.append(em)
return emails

View File

@ -1,117 +1,16 @@
from multiprocessing import Process
from queue import Queue
from threading import Thread, Event, Lock
import argparse import argparse
import os import os
from common.helper.logging import Logging from common.helper.logging import Logging
from setup.config import load_config from setup.config import load_config
import pytz import pytz
from src.bosses import Bosses from src.bosses import Bosses
import keyboard from src.instrucciones import Instrucciones
import time from src.email.supervisor import Email
from src.brain.brain import Brain
class Email(Process): def set_params(args, configs):
def __init__(self, configs, params, setup): log_name = 'email'
super(Email, self).__init__()
self.configs = configs
self.params = params
self.registry = {}
self.workers = []
self.working = []
self.worker_status = []
self.add_event('stop')
def setup(self, data):
for (m, n) in data['workers']:
self.register_worker(m, n)
for q in data['queues']:
self.add_queue(q)
for e in data['events']:
self.add_event(e)
for l in data['locks']:
self.add_lock(l)
def register_worker(self, module, name):
if module not in self.registry:
self.registry[module] = []
self.registry[module].append(name)
def add_worker(self, worker):
self.workers.append(worker)
def start_worker(self, module, name):
worker = getattr(module, name)
self.add_worker(worker)
self.working.append((module, name))
worker.start()
self.worker_status.append(True)
def start_workers(self):
for module, workers in self.registry:
for name in workers:
self.start_worker(module, name)
def check_workers(self):
stopped = 0
for (k, w) in enumerate(self.workers):
if not self.worker_status[k]:
stopped += 1
continue
if not w.is_alive():
self.params['logging'].log('Worker {0} stopped'.format(type(w)))
self.params['queues']['log'].put({'not': True, 'action': type(w)})
stopped += 1
self.worker_status[k] = False
(m, n) = self.working[k]
# Restart worker
self.start_worker(m, n)
if stopped == len(self.workers):
return False
return True
def join_workers(self):
[w.join(self.configs.get('supervisor.wait')) for w in self.workers if w.is_alive()]
def add_queue(self, name):
if 'queues' not in self.params or self.params['queues'] is None:
self.params['queues'] = {}
self.params['queues'][name] = Queue()
def add_event(self, name):
if 'events' not in self.params or self.params['events'] is None:
self.params['events'] = {}
self.params['events'][name] = Event()
def add_lock(self, name):
if 'locks' not in self.params or self.params['locks'] is None:
self.params['locks'] = {}
self.params['locks'][name] = Lock()
def run(self) -> None:
self.start_workers()
self.add_worker(Thread(target=exit_thread, args=(self.params['events']['stop'], self.params['logging'])))
while not self.params['events']['stop'].is_set():
if not self.check_workers():
break
time.sleep(self.configs.get('supervisor.wait'))
self.join_workers()
def exit_thread(stop, logger):
logger.log('Starting exit thread', caller='exit_thread')
keyboard.wait('Esc')
logger.log('Escape pressed', caller='exit_thread')
stop.set()
logger.log('Exit signal sent', caller='exit_thread')
def main(args):
configs = load_config(args.config_folder)
configs.set('timezone', pytz.timezone('America/Santiago'))
params = { params = {
'folders': { 'folders': {
'config': args.config_folder, 'config': args.config_folder,
@ -119,21 +18,46 @@ def main(args):
'data': args.data_folder 'data': args.data_folder
}, },
'bosses': Bosses(args.data_folder), 'bosses': Bosses(args.data_folder),
'logging': Logging(configs.get('timezone'), args.log_folder) 'instrucciones': Instrucciones(args.data_folder),
'brain': Brain(args.data_folder),
'logging': Logging(configs.get('timezone'), args.log_folder, log_name),
'logger': {
'name': log_name
},
'filenames': {
'consultas': os.path.join(args.data_folder, configs.get('email.consultas')),
'spam': os.path.join(args.data_folder, configs.get('email.spam')),
'revisados': os.path.join(args.data_folder, configs.get('email.revisados'))
}
} }
return params
setup = {
def set_setup():
return {
'workers': [ 'workers': [
('common.helper.logger', 'Worker'), ('common.helper.logger', 'Worker'),
('src.email', 'Obtenedor'), ('src.email.workers', 'Obtenedor'),
('src.email', 'Validador') ('src.email.workers', 'Validador'),
('src.email.workers', 'Consultador'),
('src.email.workers', 'Borrador'),
('src.email.workers', 'Procesador')
], ],
'queues': ['log', 'emails', 'valid', 'invalid'], 'queues': ['log', 'emails', 'valid', 'invalid', 'borrar'],
'events': [], 'events': [],
'locks': [] 'locks': []
} }
email = Email(configs, params, setup)
def main(args):
configs = load_config(args.config_folder)
configs.set('timezone', pytz.timezone(configs.get('supervisor.timezone')))
params = set_params(args, configs)
setup = set_setup()
email = Email(configs=configs, params=params, setup=setup)
email.start() email.start()
email.join() email.join()

View File

@ -1,78 +0,0 @@
from src.worker import Worker
from types import SimpleNamespace
from entry.email.inbox import connect, check_inbox
import re
from bs4 import BeautifulSoup
import email.utils
from src.communication.message import Message
import time
class Obtenedor(Worker):
"""
Trabajador que obtiene la lista de correos
"""
def __init__(self, configs, params):
super(Obtenedor, self).__init__(configs, params)
self.url = configs.get('email.server')
self.port = configs.get('email.port')
user = {'user': '', 'password': ''}
self.user = SimpleNamespace(**user)
self.user.name = configs.get('email.user.name')
self.user.password = configs.get('email.user.password')
self.ssl = configs.get('email.ssl')
self.revisados = []
self.queue = params['queues']['emails']
self.frec = configs.get('supervisor.wait')
def is_revisado(self, uid):
return uid in self.revisados
def add_revisado(self, uid):
if self.is_revisado(uid):
return
self.revisados.append(uid)
def build_message(self, email_part):
output = []
if email_part.is_multipart():
for part in email_part.get_payload():
output.append(self.build_message(part))
else:
html = email_part.get_payload(decode=True)
bs = BeautifulSoup(html, 'html.parser')
if bs.body:
html = bs.body.get_text()
else:
html = bs.get_text()
html = re.sub(' +', ' ', re.sub("\n+", ' ', html)).strip(' ')
output.append(html)
return output
def run(self) -> None:
self.logger.log('Starting', type(self))
self.diary.put({'action': 'Inicio de jornada de Obtenedor'})
while not self.stop.is_set():
e = 0
with connect(self.url, self.port, self.user.name, self.user.password, self.ssl) as imap:
self.logger.log('Getting emails', type(self))
emails = check_inbox(imap)
if emails is None:
continue
for em in emails:
if self.is_revisado(em.uid):
continue
sender = em.message['from']
text = ' '.join([em.message['subject'] + '.'] + self.build_message(em.message))
msg = Message('email', text=text, original=em, sender=sender,
datetime=email.utils.parsedate_to_datetime(em.message['Date']))
self.queue.put(msg)
self.add_revisado(em.uid)
e += 1
self.diary.put({'action': 'Obtenidos {0} correos nuevos'.format(e)})
time.sleep(self.frec)
self.logger.log('Exiting', type(self))
self.diary.put({'action': 'Terminando el turno de Obtenedor'})

View File

@ -1,90 +0,0 @@
from threading import Thread
import re
import time
import email.utils
from bs4 import BeautifulSoup
from entry.email.inbox import connect, check_inbox
from src.communication.message import Message
class RevisorEmailWorker(Thread):
def __init__(self, configs, params):
super().__init__()
self._url = configs.get('email.imap_server')
self._port = configs.get('email.port')
self._username = configs.get('email.username')
self._password = configs.get('email.password')
self._ssl = configs.get('email.ssl')
self.queue = params['queues']['emails']
self.questions = params['queues']['questions']
self.event = params['events']['stop']
self._wait = configs.get('supervisor.wait')
self._bosses = params['bosses']
self._logger = params['queues']['log']
self.logging = params['logging']
self.revisados = []
def revisado(self, uid):
if not self.check_revisado(uid):
self.revisados.append(uid)
def check_revisado(self, uid):
if uid in self.revisados:
return True
return False
def run(self):
self.logging.log('Starting', type(self))
self._logger.put({'action': 'Inicio jornada trabajador Revisor Email'})
while not self.event.is_set():
self.logging.log('Looping status {0}'.format(not self.event.is_set()), type(self))
self.logging.log('Connecting to Email Server', type(self))
imap = connect(imap_url=self._url, port=self._port, username=self._username, password=self._password,
ssl=self._ssl)
self.logging.log('Getting emails', type(self))
emails = check_inbox(imap)
if emails is not None:
c = 0
p = 0
for em in emails:
if self.check_revisado(em.uid):
continue
sender = em.message['from']
text = ' '.join([em.message['subject'] + '.'] + self.build_message(em.message))
msg = Message('email', text=text, original=em, sender=sender,
datetime=email.utils.parsedate_to_datetime(em.message['Date']))
if not self._bosses.is_boss(sender):
self.logging.log('Sender {0} is not a boss'.format(sender), type(self))
self.revisado(em.uid)
self.questions.put(msg)
p += 1
continue
self.queue.put(msg)
self.revisado(em.uid)
c += 1
self.logging.log('{0} emails checked'.format(c), type(self))
if c > 0:
self._logger.put({'action': 'Revise {0} nuevos correos'.format(c)})
self.logging.log('{0} emails pending'.format(p), type(self))
if p > 0:
self._logger.put({'action': 'Tengo dudas en {0} correos'.format(p)})
imap.close()
time.sleep(self._wait)
self.logging.log('Exiting', type(self))
return
def build_message(self, email_part):
output = []
if email_part.is_multipart():
for part in email_part.get_payload():
output.append(self.build_message(part))
else:
html = email_part.get_payload(decode=True)
bs = BeautifulSoup(html, 'html.parser')
if bs.body:
html = bs.body.get_text()
else:
html = bs.get_text()
html = re.sub(' +', ' ', re.sub("\n+", ' ', html)).strip(' ')
output.append(html)
return output

107
src/email/supervisor.py Normal file
View File

@ -0,0 +1,107 @@
from threading import Thread, Event, Lock
from queue import Queue
import importlib
from src.functions import exit_thread
import time
class Email(Thread):
"""
Email module supervisor thread
"""
def __init__(self, configs, params, setup):
super(Email, self).__init__()
self.configs = configs
self.params = params
self.registry = {}
self.workers = []
self.working = []
self.worker_status = []
self.add_event('stop')
self.add_event('log_stop')
self.setup(setup)
def setup(self, data):
for (m, n) in data['workers']:
self.register_worker(m, n)
for q in data['queues']:
self.add_queue(q)
for e in data['events']:
self.add_event(e)
for lo in data['locks']:
self.add_lock(lo)
def register_worker(self, module, name):
if module not in self.registry:
self.registry[module] = []
self.registry[module].append(name)
def add_worker(self, worker):
self.workers.append(worker)
def start_worker(self, module, name):
worker = getattr(module, name)
worker = worker(configs=self.configs, params=self.params)
self.add_worker(worker)
self.working.append((module, name))
worker.start()
self.worker_status.append(True)
def start_workers(self):
for module_name, workers in self.registry.items():
module = importlib.import_module(module_name)
for name in workers:
self.start_worker(module, name)
def check_workers(self):
stopped = 0
for (k, w) in enumerate(self.workers):
if not self.worker_status[k]:
stopped += 1
continue
if not w.is_alive():
self.params['logging'].log('Worker {0} stopped'.format(type(w)))
self.params['queues']['log'].put({'not': True, 'action': type(w)})
stopped += 1
self.worker_status[k] = False
(m, n) = self.working[k]
# Restart worker
self.start_worker(m, n)
if stopped == len(self.workers):
return False
return True
def join_workers(self):
[w.join(self.configs.get('supervisor.wait')) for w in self.workers if w.is_alive()]
def add_queue(self, name):
if 'queues' not in self.params or self.params['queues'] is None:
self.params['queues'] = {}
self.params['queues'][name] = Queue()
def add_event(self, name):
if 'events' not in self.params or self.params['events'] is None:
self.params['events'] = {}
self.params['events'][name] = Event()
def add_lock(self, name):
if 'locks' not in self.params or self.params['locks'] is None:
self.params['locks'] = {}
self.params['locks'][name] = Lock()
def run(self) -> None:
self.start_workers()
worker = Thread(target=exit_thread, args=(self.params['events']['stop'], self.params['logging']))
worker.start()
while not self.params['events']['stop'].is_set():
if not self.check_workers():
break
time.sleep(self.configs.get('supervisor.wait'))
self.params['logging'].log('Waiting for workers', type(self))
self.params['events']['log_stop'].set()
self.join_workers()
worker.join()

View File

@ -1,25 +0,0 @@
from src.worker import Worker
class Validador(Worker):
def __init__(self, configs, params):
super(Validador, self).__init__(configs, params)
self.emails = params['queues']['emails']
self.validos = params['queues']['valid']
self.invalidos = params['queues']['invalid']
self.bosses = params['bosses']
self.frec = configs.get('supervisor.wait')
def run(self):
self.logger.log('Starting', type(self))
self.diary.put({'action': 'Inicio de jornada de Validador'})
while not self.stop.is_set():
em = self.emails.get(timeout=self.frec)
if not self.bosses.is_boss(em.sender):
self.invalidos.put(em)
continue
self.validos.put(em)
self.logger.log('Exiting', type(self))
self.diary.put({'action': 'Terminando la jornada de Validador'})

302
src/email/workers.py Normal file
View File

@ -0,0 +1,302 @@
import queue
from src.email.functions import connect, check_inbox
import time
from types import SimpleNamespace
from src.worker import Worker
from bs4 import BeautifulSoup
import re
import email.utils
from src.communication import Message
import json
from src.functions import dump_queue
from pprint import pprint
class Obtenedor(Worker):
"""
Trabajador que obtiene la lista de correos del inbox configurado
"""
def __init__(self, configs, params):
super(Obtenedor, self).__init__(configs, params)
self.name = 'Email:Obtenedor'
self.url = configs.get('email.imap.server')
self.port = configs.get('email.imap.port')
user = {'user': '', 'password': ''}
self.user = SimpleNamespace(**user)
self.user.name = configs.get('email.imap.user.name')
self.user.password = configs.get('email.imap.user.password')
self.ssl = configs.get('email.imap.ssl')
self.filename = params['filenames']['revisados']
self.revisados = []
self.load_revisados()
self.queue = params['queues']['emails']
self.frec = configs.get('supervisor.wait')
def is_revisado(self, uid):
return uid in self.revisados
def load_revisados(self):
data = []
try:
with open(self.filename, 'r') as f:
data = json.load(f)
except FileNotFoundError:
pass
self.revisados = data
def save_revisados(self):
data = []
try:
with open(self.filename, 'r') as f:
data = json.load(f)
except FileNotFoundError:
pass
for uid in self.revisados:
if uid not in data:
data.append(uid)
with open(self.filename, 'w') as f:
json.dump(data, f)
def add_revisado(self, uid):
if self.is_revisado(uid):
return
self.revisados.append(uid)
def build_message(self, email_part):
output = []
if email_part.is_multipart():
for part in email_part.get_payload():
output += self.build_message(part)
else:
html = email_part.get_payload(decode=True).decode('utf-8')
bs = BeautifulSoup(html, 'html.parser')
if bs.body:
html = bs.body.get_text()
else:
html = bs.get_text()
html = re.sub(' +', ' ', re.sub("\n+", ' ', html)).strip(' ')
output.append(html)
return output
def run(self) -> None:
self.start_turn()
while not self.stop.is_set():
e = 0
with connect(self.url, self.port, self.user.name, self.user.password, self.ssl) as imap:
self.logger.log('Getting emails', type(self))
emails = check_inbox(imap)
if emails is None:
self.logger.log('No emails found', type(self))
continue
for em in emails:
if self.is_revisado(em.uid):
continue
sender = em.message['from']
# text = ' '.join([em.message['subject'] + '.'] + self.build_message(em.message))
text = self.build_message(em.message)
msg = Message('email', text=text, original=em, sender=sender, subject=str(em.message['subject']),
datetime=email.utils.parsedate_to_datetime(em.message['Date']))
self.queue.put(msg)
self.add_revisado(em.uid)
e += 1
self.logger.log('{0} new emails found'.format(e), type(self))
self.diary.put({'message': 'Obtenidos {0} correos nuevos'.format(e)})
time.sleep(self.frec)
# self.save_revisados()
self.end_turn()
class Validador(Worker):
"""
Trabajador que valida segun las reglas establecidas
Reglas:
1. Listado de jefes, con sus correos en la libreta de contactos -> validos
2. Instrucciones conocidas -> invalidos, pero para revisar
3. Listado de spam -> borrar
"""
def __init__(self, configs, params):
super(Validador, self).__init__(configs=configs, params=params)
self.name = 'Email:Validador'
self.emails = params['queues']['emails']
self.validos = params['queues']['valid']
self.invalidos = params['queues']['invalid']
self.borrar = params['queues']['borrar']
self.bosses = params['bosses']
self.instrucciones = params['instrucciones']
self.frec = configs.get('supervisor.wait')
def validar_bosses(self, sender):
return self.bosses.is_boss(sender)
def validar_instrucciones(self, message):
return self.instrucciones.is_valid(message.subject)
def validar(self, message):
if self.validar_bosses(message.sender):
self.validos.put(message)
return
if self.validar_instrucciones(message):
self.invalidos.put(message)
return
self.borrar.put(message.original.uid)
def run(self):
self.start_turn()
while not self.stop.is_set():
try:
em = self.emails.get(timeout=self.frec)
except queue.Empty:
continue
self.validar(em)
# Cleanup
[self.validar(em) for em in dump_queue(self.emails, self.frec)]
self.end_turn()
class Consultador(Worker):
"""
Trabajador que registra los correos que no son de jefes para consulta
"""
def __init__(self, configs, params):
super(Consultador, self).__init__(configs=configs, params=params)
self.name = 'Email:Consultador'
self.filename = params['filenames']['consultas']
self.invalidos = params['queues']['invalid']
self.frec = configs.get('supervisor.wait')
self.max = configs.get('email.max')
self.mensajes = []
def is_full(self):
return len(self.mensajes) >= self.max
def save_messages(self):
data = []
try:
with open(self.filename, 'r') as f:
data = json.load(f)
except FileNotFoundError:
pass
for m in self.mensajes:
if m not in data:
data.append(m)
with open(self.filename, 'w') as f:
json.dump(data, f, indent=2)
self.mensajes = []
def parse_message(self, message):
msg = {
'sender': message.sender,
'uid': message.original.uid,
'time': message.datetime.strftime('%Y-%m-%d %H:%M:%S'),
'message': message.text
}
self.mensajes.append(msg)
def run(self) -> None:
self.start_turn()
while not self.stop.is_set():
try:
em = self.invalidos.get(timeout=self.frec)
except queue.Empty:
continue
self.parse_message(em)
if self.is_full():
self.save_messages()
[self.parse_message(message) for message in dump_queue(self.invalidos, self.frec)]
self.save_messages()
self.end_turn()
class Borrador(Worker):
"""
Trabajador que borra los correos marcados para borrar
"""
def __init__(self, configs, params):
super(Borrador, self).__init__(configs=configs, params=params)
self.name = 'Email:Borrador'
self.queue = params['queues']['borrar']
self.frec = configs.get('supervisor.wait')
self.max = configs.get('email.max')
self.url = configs.get('email.imap.server')
self.port = configs.get('email.imap.port')
user = {'user': '', 'password': ''}
self.user = SimpleNamespace(**user)
self.user.name = configs.get('email.imap.user.name')
self.user.password = configs.get('email.imap.user.password')
self.ssl = configs.get('email.imap.ssl')
self.borrar = []
def is_full(self):
return len(self.borrar) >= self.max
def do_borrar(self):
with connect(self.url, self.port, self.user.name, self.user.password, self.ssl) as imap:
status, msg = imap.select('INBOX')
if status != 'OK':
return
for uid in self.borrar:
print('Borrar ', uid)
# status, ids = imap.uid('store', uid, '+FLAGS', b'\\Deleted')
# if status != 'OK':
# continue
# imap.expunge()
self.borrar = []
def run(self) -> None:
self.start_turn()
while not self.stop.is_set():
try:
uid = self.queue.get(timeout=self.frec)
except queue.Empty:
continue
self.borrar.append(uid)
if self.is_full():
self.do_borrar()
# Cleanup
[self.borrar.append(uid) for uid in dump_queue(self.queue, self.frec)]
self.do_borrar()
self.end_turn()
class Procesador(Worker):
"""
Trabajador que revisa los correos validos y los procesa de acuerdo a las instrucciones
"""
def __init__(self, configs, params):
super(Procesador, self).__init__(configs=configs, params=params)
self.name = 'Email:Procesador'
self.queue = params['queues']['valid']
self.frec = configs.get('supervisor.wait')
self.brain = params['brain']
def cleanup(self):
self.brain.save_model()
def run(self) -> None:
self.start_turn()
while not self.stop.is_set():
try:
em = self.queue.get(timeout=self.frec)
except queue.Empty:
continue
command = self.brain.get_command(em.subject)
pprint((em.subject, command))
for t in em.text:
contenido = self.brain.get_response(command, t)
self.cleanup()
self.end_turn()

21
src/functions.py Normal file
View File

@ -0,0 +1,21 @@
import keyboard
import queue as q
def exit_thread(stop, logger):
logger.log('Starting exit thread', caller='exit_thread')
keyboard.wait('Esc')
logger.log('Escape pressed', caller='exit_thread')
stop.set()
logger.log('Exit signal sent', caller='exit_thread')
def dump_queue(queue, wait):
data = []
while True:
try:
m = queue.get(timeout=wait)
except q.Empty:
break
data.append(m)
return data

96
src/instrucciones.py Normal file
View File

@ -0,0 +1,96 @@
import os
import json
class Command:
def __init__(self):
self.command = ''
class Commands:
def __init__(self, data_folder):
self.filename = os.path.join(data_folder, 'commands.json')
self.commands = []
self.load_commands(self.filename)
def load_commands(self, filename):
data = []
if os.path.isfile(filename):
with open(filename, 'r') as f:
data = json.load(f)
for c in data:
cmd = Command()
cmd.command = c
self.commands.append(cmd)
def get(self, command):
for i, c in enumerate(self.commands):
if command == c.command:
return i
return None
def find(self, command):
return self.commands[self.get(command=command)]
class Instruccion:
def __init__(self):
self.instruccion = ''
self.command = None
self.params = {}
class Instrucciones:
def __init__(self, data_folder):
self.filename = os.path.join(data_folder, 'instrucciones.json')
self.commands = Commands(data_folder)
self.instrucciones = []
self.load_instrucciones(self.filename)
self.idx = 0
def load_instrucciones(self, filename):
data = []
if os.path.isfile(filename):
with open(filename, 'r') as f:
data = json.load(f)
for d in data:
i = Instruccion()
i.instruccion = d['name']
if 'params' in d:
for param, val in d['params'].items():
i.params[param] = val
if 'command' in d:
i.command = self.commands.find(d['command'])
self.instrucciones.append(i)
def get(self, instruccion):
if not self.is_valid(instruccion):
return None
for i, ins in enumerate(self.instrucciones):
if instruccion == ins.instruccion:
return i
def find(self, instruccion):
if not self.is_valid(instruccion):
return None
return self.instrucciones[self.get(instruccion)]
def is_valid(self, instruccion):
for i in self.instrucciones:
if instruccion == i.instruccion:
return True
return False
def add(self, instruccion, aliases: list = None):
if self.is_valid(instruccion):
return
ins = Instruccion()
ins.instruccion = instruccion
self.instrucciones.append(ins)
def save(self):
data = [{'instruccion': i.instruccion, 'params': i.params.items()} for i in self.instrucciones]
with open(self.filename, 'w') as f:
json.dump(data, f, indent=4)

View File

@ -7,3 +7,11 @@ class Worker(Thread):
self.stop = params['events']['stop'] self.stop = params['events']['stop']
self.diary = params['queues']['log'] self.diary = params['queues']['log']
self.logger = params['logging'] self.logger = params['logging']
def start_turn(self):
self.diary.put({'start_turn': self.name})
self.logger.log('Starting', type(self))
def end_turn(self):
self.diary.put({'end_turn': self.name})
self.logger.log('Exiting', type(self))