Usando spaCy para NLP que permite diferenciar entidades en el texto

This commit is contained in:
2020-11-17 18:44:32 -03:00
parent 2d157f6989
commit e2fd9b7daa
6 changed files with 85 additions and 10 deletions

View File

@ -10,6 +10,8 @@ keyboard = "*"
beautifulsoup4 = "*"
pytz = "*"
flask = "*"
nltk = "*"
spacy = "*"
[requires]
python_version = "3.9"

3
src/brain/aiml.py Normal file
View File

@ -0,0 +1,3 @@
class Aiml:
def __init__(self):
pass

12
src/brain/brain.py Normal file
View File

@ -0,0 +1,12 @@
import os
import spacy
class Brain:
def __init__(self, data_folder):
self.filename = os.path.join(data_folder, 'brain.json')
self.nlp = spacy.load('es_core_news_sm')
def get_command(self, phrase):
doc = self.nlp(phrase)
return doc

View File

@ -6,6 +6,7 @@ import pytz
from src.bosses import Bosses
from src.instrucciones import Instrucciones
from src.email.supervisor import Email
from src.brain.brain import Brain
def set_params(args, configs):
@ -18,6 +19,7 @@ def set_params(args, configs):
},
'bosses': Bosses(args.data_folder),
'instrucciones': Instrucciones(args.data_folder),
'brain': Brain(args.data_folder),
'logging': Logging(configs.get('timezone'), args.log_folder, log_name),
'logger': {
'name': log_name

View File

@ -103,7 +103,7 @@ class Obtenedor(Worker):
self.logger.log('{0} new emails found'.format(e), type(self))
self.diary.put({'message': 'Obtenidos {0} correos nuevos'.format(e)})
time.sleep(self.frec)
self.save_revisados()
# self.save_revisados()
self.end_turn()
@ -141,7 +141,7 @@ class Validador(Worker):
if self.validar_instrucciones(message):
self.invalidos.put(message)
return
self.borrar.put(message)
self.borrar.put(message.original.uid)
def run(self):
self.start_turn()
@ -245,11 +245,12 @@ class Borrador(Worker):
return
for uid in self.borrar:
status, ids = imap.uid('store', uid, '+FLAGS', b'\\Deleted')
if status != 'OK':
continue
print(uid)
# status, ids = imap.uid('store', uid, '+FLAGS', b'\\Deleted')
# if status != 'OK':
# continue
imap.expunge()
# imap.expunge()
self.borrar = []
def run(self) -> None:
@ -276,9 +277,18 @@ class Procesador(Worker):
super(Procesador, self).__init__(configs=configs, params=params)
self.name = 'Email:Procesador'
self.queue = params['queues']['valid']
self.frec = configs.get('supervisor.wait')
self.brain = params['brain']
def run(self) -> None:
self.start_turn()
while not self.stop.is_set():
try:
em = self.queue.get(timeout=self.frec)
except queue.Empty:
continue
print(em.text)
command = self.brain.get_command(em.text)
[print(ent) for ent in command.ents]
self.end_turn()

View File

@ -2,10 +2,43 @@ import os
import json
class Command:
def __init__(self):
self.command = ''
class Commands:
def __init__(self, data_folder):
self.filename = os.path.join(data_folder, 'commands.json')
data = []
try:
with open(self.filename, 'r') as f:
data = json.load(f)
except FileNotFoundError:
pass
self.commands = []
for c in data:
cmd = Command()
cmd.command = c
self.commands.append(cmd)
def get(self, command):
for i, c in enumerate(self.commands):
if command == c.command:
return i
return None
def find(self, command):
return self.commands[self.get(command=command)]
class Instruccion:
def __init__(self):
self.instruccion = ''
self.aliases = []
self.command = None
self.params = {}
class Instrucciones:
@ -18,12 +51,20 @@ class Instrucciones:
except FileNotFoundError:
pass
self.commands = Commands(data_folder)
self.instrucciones = []
for d in data:
i = Instruccion()
i.instruccion = d['instruccion']
i.instruccion = d['name']
if 'aliases' in d:
for a in d['aliases']:
i.aliases.append(a)
if 'params' in d:
for param, val in d['params'].items():
i.params[param] = val
if 'command' in d:
i.command = self.commands.find(d['command'])
self.instrucciones.append(i)
def get(self, instruccion):
@ -35,6 +76,11 @@ class Instrucciones:
if instruccion in ins.aliases:
return i
def find(self, instruccion):
if not self.is_valid(instruccion):
return None
return self.instrucciones[self.get(instruccion)]
def is_valid(self, instruccion):
for i in self.instrucciones:
if instruccion == i.instruccion: