Files
contabilidad/python/src/ai/network.py
2021-12-22 21:53:30 -03:00

127 lines
3.6 KiB
Python

import json
import os
import time
import timeit
import tensorflow as tf
import sklearn
import numpy as np
from sklearn.preprocessing import LabelEncoder
import src.contabilidad.pdf as pdf
import src.contabilidad.text_handler as th
class Layer:
def __init__(self):
self.__weights = None
self.__bias = None
def set_size(self, inputs: int, size: int):
self.__weights = [[0 for j in range(0, inputs)] for i in range(0, size)]
self.__bias = [0 for i in range(0, size)]
def add_weight(self, vector: list, idx: int = None):
if idx is None:
self.__weights.append(vector)
return self
self.__weights = self.__weights[:idx] + [vector] + self.__weights[idx:]
return self
def set_weight(self, value: float, weight_index: int, input_index: int):
self.__weights[weight_index][input_index] = value
def set_bias(self, value: list):
self.__bias = value
def train(self, input_values: list, output_values: list):
output = self.get_output(input_values)
errors = []
for i, v in enumerate(output):
error = (output_values[i] - v) / output_values[i]
new_value = v * error
def to_json(self):
return {
'bias': self.__bias,
'weights': self.__weights
}
def get_output(self, vector: list):
output = []
for i, weight in enumerate(self.__weights):
val = 0
for j, v in enumerate(weight):
val += v * vector[j]
output[i] = val + self.__bias[i]
return output
def layer_factory(layer_dict: dict):
layer = Layer()
layer.set_bias(layer_dict['bias'])
[layer.add_weight(w) for w in layer_dict['weights']]
return layer
class Network:
def __init__(self, filename: str):
self._filename = filename
self.__layers = None
def load(self):
with open(self._filename) as f:
data = json.load(f)
if 'layers' in data.keys():
self.add_layers(data['layers'])
def add_layers(self, layers: list):
for lr in layers:
layer = layer_factory(lr)
self.__layers.append(layer)
class AI:
def __init__(self, dictionary_filename, logger):
self.__dict = None
self.__network = None
self.__sources = None
self._phrases = None
self.filename = ''
def add_source(self, text):
if self.__sources is None:
self.__sources = []
self.__sources.append(text)
return self
def set_filename(self, filename: str):
self.filename = filename
return self
def process_sources(self):
for source in self.__sources:
self.process(**source)
def process(self, filename, password):
encoder = LabelEncoder()
t = filename.split('.')
temp = os.path.realpath(os.path.join(os.path.dirname(filename), t[0] + '-temp.pdf'))
pdf.remove_encryption(filename, password, temp)
obj = pdf.get_text(temp)
os.remove(temp)
word_list = th.split_words(obj)
fits = encoder.fit_transform(word_list)
phrases = []
for length in range(1, len(word_list) + 1):
for start in range(0, len(word_list)):
phrase = word_list[start:(start + length)]
phrase = np.append(np.array([fits[word_list.index(w)] for w in phrase]),
np.zeros([len(word_list) - len(phrase)]))
phrases.append(phrase)
phrases = np.array(phrases)
self._phrases = phrases
def active_train(self):
pass