import json import os import time import timeit import tensorflow as tf import sklearn import numpy as np from sklearn.preprocessing import LabelEncoder import src.contabilidad.pdf as pdf import src.contabilidad.text_handler as th class Layer: def __init__(self): self.__weights = None self.__bias = None def set_size(self, inputs: int, size: int): self.__weights = [[0 for j in range(0, inputs)] for i in range(0, size)] self.__bias = [0 for i in range(0, size)] def add_weight(self, vector: list, idx: int = None): if idx is None: self.__weights.append(vector) return self self.__weights = self.__weights[:idx] + [vector] + self.__weights[idx:] return self def set_weight(self, value: float, weight_index: int, input_index: int): self.__weights[weight_index][input_index] = value def set_bias(self, value: list): self.__bias = value def train(self, input_values: list, output_values: list): output = self.get_output(input_values) errors = [] for i, v in enumerate(output): error = (output_values[i] - v) / output_values[i] new_value = v * error def to_json(self): return { 'bias': self.__bias, 'weights': self.__weights } def get_output(self, vector: list): output = [] for i, weight in enumerate(self.__weights): val = 0 for j, v in enumerate(weight): val += v * vector[j] output[i] = val + self.__bias[i] return output def layer_factory(layer_dict: dict): layer = Layer() layer.set_bias(layer_dict['bias']) [layer.add_weight(w) for w in layer_dict['weights']] return layer class Network: def __init__(self, filename: str): self._filename = filename self.__layers = None def load(self): with open(self._filename) as f: data = json.load(f) if 'layers' in data.keys(): self.add_layers(data['layers']) def add_layers(self, layers: list): for lr in layers: layer = layer_factory(lr) self.__layers.append(layer) class AI: def __init__(self, dictionary_filename, logger): self.__dict = None self.__network = None self.__sources = None self._phrases = None self.filename = '' def add_source(self, text): if self.__sources is None: self.__sources = [] self.__sources.append(text) return self def set_filename(self, filename: str): self.filename = filename return self def process_sources(self): for source in self.__sources: self.process(**source) def process(self, filename, password): encoder = LabelEncoder() t = filename.split('.') temp = os.path.realpath(os.path.join(os.path.dirname(filename), t[0] + '-temp.pdf')) pdf.remove_encryption(filename, password, temp) obj = pdf.get_text(temp) os.remove(temp) word_list = th.split_words(obj) fits = encoder.fit_transform(word_list) phrases = [] for length in range(1, len(word_list) + 1): for start in range(0, len(word_list)): phrase = word_list[start:(start + length)] phrase = np.append(np.array([fits[word_list.index(w)] for w in phrase]), np.zeros([len(word_list) - len(phrase)])) phrases.append(phrase) phrases = np.array(phrases) self._phrases = phrases def active_train(self): pass