Usando spaCy para NLP que permite diferenciar entidades en el texto

This commit is contained in:
2020-11-17 18:44:32 -03:00
parent 2d157f6989
commit e2fd9b7daa
6 changed files with 85 additions and 10 deletions

View File

@ -10,6 +10,8 @@ keyboard = "*"
beautifulsoup4 = "*" beautifulsoup4 = "*"
pytz = "*" pytz = "*"
flask = "*" flask = "*"
nltk = "*"
spacy = "*"
[requires] [requires]
python_version = "3.9" python_version = "3.9"

3
src/brain/aiml.py Normal file
View File

@ -0,0 +1,3 @@
class Aiml:
def __init__(self):
pass

12
src/brain/brain.py Normal file
View File

@ -0,0 +1,12 @@
import os
import spacy
class Brain:
def __init__(self, data_folder):
self.filename = os.path.join(data_folder, 'brain.json')
self.nlp = spacy.load('es_core_news_sm')
def get_command(self, phrase):
doc = self.nlp(phrase)
return doc

View File

@ -6,6 +6,7 @@ import pytz
from src.bosses import Bosses from src.bosses import Bosses
from src.instrucciones import Instrucciones from src.instrucciones import Instrucciones
from src.email.supervisor import Email from src.email.supervisor import Email
from src.brain.brain import Brain
def set_params(args, configs): def set_params(args, configs):
@ -18,6 +19,7 @@ def set_params(args, configs):
}, },
'bosses': Bosses(args.data_folder), 'bosses': Bosses(args.data_folder),
'instrucciones': Instrucciones(args.data_folder), 'instrucciones': Instrucciones(args.data_folder),
'brain': Brain(args.data_folder),
'logging': Logging(configs.get('timezone'), args.log_folder, log_name), 'logging': Logging(configs.get('timezone'), args.log_folder, log_name),
'logger': { 'logger': {
'name': log_name 'name': log_name

View File

@ -103,7 +103,7 @@ class Obtenedor(Worker):
self.logger.log('{0} new emails found'.format(e), type(self)) self.logger.log('{0} new emails found'.format(e), type(self))
self.diary.put({'message': 'Obtenidos {0} correos nuevos'.format(e)}) self.diary.put({'message': 'Obtenidos {0} correos nuevos'.format(e)})
time.sleep(self.frec) time.sleep(self.frec)
self.save_revisados() # self.save_revisados()
self.end_turn() self.end_turn()
@ -141,7 +141,7 @@ class Validador(Worker):
if self.validar_instrucciones(message): if self.validar_instrucciones(message):
self.invalidos.put(message) self.invalidos.put(message)
return return
self.borrar.put(message) self.borrar.put(message.original.uid)
def run(self): def run(self):
self.start_turn() self.start_turn()
@ -245,11 +245,12 @@ class Borrador(Worker):
return return
for uid in self.borrar: for uid in self.borrar:
status, ids = imap.uid('store', uid, '+FLAGS', b'\\Deleted') print(uid)
if status != 'OK': # status, ids = imap.uid('store', uid, '+FLAGS', b'\\Deleted')
continue # if status != 'OK':
# continue
imap.expunge() # imap.expunge()
self.borrar = [] self.borrar = []
def run(self) -> None: def run(self) -> None:
@ -276,9 +277,18 @@ class Procesador(Worker):
super(Procesador, self).__init__(configs=configs, params=params) super(Procesador, self).__init__(configs=configs, params=params)
self.name = 'Email:Procesador' self.name = 'Email:Procesador'
self.queue = params['queues']['valid']
self.frec = configs.get('supervisor.wait')
self.brain = params['brain']
def run(self) -> None: def run(self) -> None:
self.start_turn() self.start_turn()
while not self.stop.is_set(): while not self.stop.is_set():
try:
em = self.queue.get(timeout=self.frec)
except queue.Empty:
continue continue
print(em.text)
command = self.brain.get_command(em.text)
[print(ent) for ent in command.ents]
self.end_turn() self.end_turn()

View File

@ -2,10 +2,43 @@ import os
import json import json
class Command:
def __init__(self):
self.command = ''
class Commands:
def __init__(self, data_folder):
self.filename = os.path.join(data_folder, 'commands.json')
data = []
try:
with open(self.filename, 'r') as f:
data = json.load(f)
except FileNotFoundError:
pass
self.commands = []
for c in data:
cmd = Command()
cmd.command = c
self.commands.append(cmd)
def get(self, command):
for i, c in enumerate(self.commands):
if command == c.command:
return i
return None
def find(self, command):
return self.commands[self.get(command=command)]
class Instruccion: class Instruccion:
def __init__(self): def __init__(self):
self.instruccion = '' self.instruccion = ''
self.aliases = [] self.aliases = []
self.command = None
self.params = {}
class Instrucciones: class Instrucciones:
@ -18,12 +51,20 @@ class Instrucciones:
except FileNotFoundError: except FileNotFoundError:
pass pass
self.commands = Commands(data_folder)
self.instrucciones = [] self.instrucciones = []
for d in data: for d in data:
i = Instruccion() i = Instruccion()
i.instruccion = d['instruccion'] i.instruccion = d['name']
if 'aliases' in d:
for a in d['aliases']: for a in d['aliases']:
i.aliases.append(a) i.aliases.append(a)
if 'params' in d:
for param, val in d['params'].items():
i.params[param] = val
if 'command' in d:
i.command = self.commands.find(d['command'])
self.instrucciones.append(i) self.instrucciones.append(i)
def get(self, instruccion): def get(self, instruccion):
@ -35,6 +76,11 @@ class Instrucciones:
if instruccion in ins.aliases: if instruccion in ins.aliases:
return i return i
def find(self, instruccion):
if not self.is_valid(instruccion):
return None
return self.instrucciones[self.get(instruccion)]
def is_valid(self, instruccion): def is_valid(self, instruccion):
for i in self.instrucciones: for i in self.instrucciones:
if instruccion == i.instruccion: if instruccion == i.instruccion: