127 lines
3.6 KiB
Python
127 lines
3.6 KiB
Python
import json
|
|
import os
|
|
import time
|
|
import timeit
|
|
|
|
import tensorflow as tf
|
|
import sklearn
|
|
import numpy as np
|
|
from sklearn.preprocessing import LabelEncoder
|
|
|
|
import src.contabilidad.pdf as pdf
|
|
import src.contabilidad.text_handler as th
|
|
|
|
|
|
class Layer:
|
|
def __init__(self):
|
|
self.__weights = None
|
|
self.__bias = None
|
|
|
|
def set_size(self, inputs: int, size: int):
|
|
self.__weights = [[0 for j in range(0, inputs)] for i in range(0, size)]
|
|
self.__bias = [0 for i in range(0, size)]
|
|
|
|
def add_weight(self, vector: list, idx: int = None):
|
|
if idx is None:
|
|
self.__weights.append(vector)
|
|
return self
|
|
self.__weights = self.__weights[:idx] + [vector] + self.__weights[idx:]
|
|
return self
|
|
|
|
def set_weight(self, value: float, weight_index: int, input_index: int):
|
|
self.__weights[weight_index][input_index] = value
|
|
|
|
def set_bias(self, value: list):
|
|
self.__bias = value
|
|
|
|
def train(self, input_values: list, output_values: list):
|
|
output = self.get_output(input_values)
|
|
errors = []
|
|
for i, v in enumerate(output):
|
|
error = (output_values[i] - v) / output_values[i]
|
|
new_value = v * error
|
|
|
|
def to_json(self):
|
|
return {
|
|
'bias': self.__bias,
|
|
'weights': self.__weights
|
|
}
|
|
|
|
def get_output(self, vector: list):
|
|
output = []
|
|
for i, weight in enumerate(self.__weights):
|
|
val = 0
|
|
for j, v in enumerate(weight):
|
|
val += v * vector[j]
|
|
output[i] = val + self.__bias[i]
|
|
return output
|
|
|
|
|
|
def layer_factory(layer_dict: dict):
|
|
layer = Layer()
|
|
layer.set_bias(layer_dict['bias'])
|
|
[layer.add_weight(w) for w in layer_dict['weights']]
|
|
return layer
|
|
|
|
|
|
class Network:
|
|
def __init__(self, filename: str):
|
|
self._filename = filename
|
|
self.__layers = None
|
|
|
|
def load(self):
|
|
with open(self._filename) as f:
|
|
data = json.load(f)
|
|
if 'layers' in data.keys():
|
|
self.add_layers(data['layers'])
|
|
|
|
def add_layers(self, layers: list):
|
|
for lr in layers:
|
|
layer = layer_factory(lr)
|
|
self.__layers.append(layer)
|
|
|
|
|
|
class AI:
|
|
def __init__(self, dictionary_filename, logger):
|
|
self.__dict = None
|
|
self.__network = None
|
|
self.__sources = None
|
|
self._phrases = None
|
|
self.filename = ''
|
|
|
|
def add_source(self, text):
|
|
if self.__sources is None:
|
|
self.__sources = []
|
|
self.__sources.append(text)
|
|
return self
|
|
|
|
def set_filename(self, filename: str):
|
|
self.filename = filename
|
|
return self
|
|
|
|
def process_sources(self):
|
|
for source in self.__sources:
|
|
self.process(**source)
|
|
|
|
def process(self, filename, password):
|
|
encoder = LabelEncoder()
|
|
t = filename.split('.')
|
|
temp = os.path.realpath(os.path.join(os.path.dirname(filename), t[0] + '-temp.pdf'))
|
|
pdf.remove_encryption(filename, password, temp)
|
|
obj = pdf.get_text(temp)
|
|
os.remove(temp)
|
|
word_list = th.split_words(obj)
|
|
fits = encoder.fit_transform(word_list)
|
|
phrases = []
|
|
for length in range(1, len(word_list) + 1):
|
|
for start in range(0, len(word_list)):
|
|
phrase = word_list[start:(start + length)]
|
|
phrase = np.append(np.array([fits[word_list.index(w)] for w in phrase]),
|
|
np.zeros([len(word_list) - len(phrase)]))
|
|
phrases.append(phrase)
|
|
phrases = np.array(phrases)
|
|
self._phrases = phrases
|
|
|
|
def active_train(self):
|
|
pass
|