Top

tensorbuilder.tensordata module

import asq
from asq.initiators import query
import asq.queryables
import random
from itertools import islice, izip_longest
import numpy as np
import tensorflow as tf
from decorator import decorator

"""
"""

@decorator
def immutable(method, self, *args, **kwargs):
    """
    Decorator. Passes a copy of the entity to the method so that the original object remains un touched.
    Used in methods to get a fluent immatable API.
    """
    return method(self.copy(), *args, **kwargs)

class Data(object):
    """docstring for Data"""
    def __init__(self, _iterator=None, **sources):
        super(Data, self).__init__()
        self.sources = sources
        self.__dict__.update(sources)

        self._iterator = _iterator if _iterator else lambda: self._raw_data()
        self.batch = None
        self.patch = None


    def copy(self):
        return Data(_iterator=self._iterator, **self.sources)

    def __iter__(self):
        return self._iterator()

    def enumerated(self):
        return enumerate(self._iterator())


    def split(self, *splits):
        """docstring for Batcher"""

        data_length = len(self.x)

        indexes = range(data_length)
        random.shuffle(indexes)

        splits = [0] + list(splits)
        splits_total = sum(splits)

        return (
            query(splits)
            .scan()
            .select(lambda n: int(data_length * n / splits_total))
            .then(_window, n=2)
            .select(lambda (start, end): np.array(indexes[start:end]))
            .select(lambda split: Data(**{k: source[split,:] for (k, source) in self.sources.iteritems()}))
            .to_list()
        )


    @immutable
    def raw_data(self):
        self._iterator = lambda: self._raw_data()
        return self

    def _raw_data(self):
        yield self


    @immutable
    def batches_of(self, batch_size):
        """
        docstring for Batcher
        """
        _iterator = self._iterator
        self._iterator = lambda: self._batch(batch_size, _iterator)
        return self

    def _batch(self, batch_size, _iterator):
        for data in _iterator():
            length = len(data.x)
            sample = np.random.choice(length, batch_size)
            new_data = Data(**{k: source[sample] for (k, source) in data.sources.iteritems()})

            yield new_data

    @immutable
    def epochs(self, epochs):
        """docstring for Batcher"""
        _iterator = self._iterator
        self._iterator = lambda: self._epochs(epochs, _iterator)
        return self

    def _epochs(self, epochs, _iterator):
        for epoch in range(epochs):
            for data in _iterator():
                data.epoch = epoch
                yield data


    def placeholders(self, *args):
        return list(self._placeholders(*args))

    def _placeholders(self, *args):
         for source_name in args:
             source = self.sources[source_name]
             shape = [None] + list(source.shape)[1:]
             yield tf.placeholder(tf.float32, shape=shape)


    def run(self, sess, tensor, tensors={}, **feed):

        try:
            tensor = tensor.tensor()
        except:
            pass

        feed = { feed[k]: self.sources[k] for k in feed }
        feed.update(tensors)

        return sess.run(tensor, feed_dict=feed)


def _window(seq, n=2):
    "Returns a sliding window (of width n) over data from the iterable"
    "   s -> (s0,s1,...s[n-1]), (s1,s2,...,sn), ...                   "
    it = iter(seq)
    result = tuple(islice(it, n))
    if len(result) == n:
        yield result
    for elem in it:
        result = result[1:] + (elem,)
        yield result

def _then(q, fn, *args, **kwargs):
    return query(fn(q, *args, **kwargs))

asq.queryables.Queryable.then = _then

Functions

def immutable(

func)

Decorator. Passes a copy of the entity to the method so that the original object remains un touched. Used in methods to get a fluent immatable API.

@decorator
def immutable(method, self, *args, **kwargs):
    """
    Decorator. Passes a copy of the entity to the method so that the original object remains un touched.
    Used in methods to get a fluent immatable API.
    """
    return method(self.copy(), *args, **kwargs)

Classes

class Data

docstring for Data

class Data(object):
    """docstring for Data"""
    def __init__(self, _iterator=None, **sources):
        super(Data, self).__init__()
        self.sources = sources
        self.__dict__.update(sources)

        self._iterator = _iterator if _iterator else lambda: self._raw_data()
        self.batch = None
        self.patch = None


    def copy(self):
        return Data(_iterator=self._iterator, **self.sources)

    def __iter__(self):
        return self._iterator()

    def enumerated(self):
        return enumerate(self._iterator())


    def split(self, *splits):
        """docstring for Batcher"""

        data_length = len(self.x)

        indexes = range(data_length)
        random.shuffle(indexes)

        splits = [0] + list(splits)
        splits_total = sum(splits)

        return (
            query(splits)
            .scan()
            .select(lambda n: int(data_length * n / splits_total))
            .then(_window, n=2)
            .select(lambda (start, end): np.array(indexes[start:end]))
            .select(lambda split: Data(**{k: source[split,:] for (k, source) in self.sources.iteritems()}))
            .to_list()
        )


    @immutable
    def raw_data(self):
        self._iterator = lambda: self._raw_data()
        return self

    def _raw_data(self):
        yield self


    @immutable
    def batches_of(self, batch_size):
        """
        docstring for Batcher
        """
        _iterator = self._iterator
        self._iterator = lambda: self._batch(batch_size, _iterator)
        return self

    def _batch(self, batch_size, _iterator):
        for data in _iterator():
            length = len(data.x)
            sample = np.random.choice(length, batch_size)
            new_data = Data(**{k: source[sample] for (k, source) in data.sources.iteritems()})

            yield new_data

    @immutable
    def epochs(self, epochs):
        """docstring for Batcher"""
        _iterator = self._iterator
        self._iterator = lambda: self._epochs(epochs, _iterator)
        return self

    def _epochs(self, epochs, _iterator):
        for epoch in range(epochs):
            for data in _iterator():
                data.epoch = epoch
                yield data


    def placeholders(self, *args):
        return list(self._placeholders(*args))

    def _placeholders(self, *args):
         for source_name in args:
             source = self.sources[source_name]
             shape = [None] + list(source.shape)[1:]
             yield tf.placeholder(tf.float32, shape=shape)


    def run(self, sess, tensor, tensors={}, **feed):

        try:
            tensor = tensor.tensor()
        except:
            pass

        feed = { feed[k]: self.sources[k] for k in feed }
        feed.update(tensors)

        return sess.run(tensor, feed_dict=feed)

Ancestors (in MRO)

  • Data
  • __builtin__.object

Instance variables

var batch

var patch

var sources

Methods

def __init__(

self, _iterator=None, **sources)

def __init__(self, _iterator=None, **sources):
    super(Data, self).__init__()
    self.sources = sources
    self.__dict__.update(sources)
    self._iterator = _iterator if _iterator else lambda: self._raw_data()
    self.batch = None
    self.patch = None

def batches_of(

self, batch_size)

docstring for Batcher

@immutable
def batches_of(self, batch_size):
    """
    docstring for Batcher
    """
    _iterator = self._iterator
    self._iterator = lambda: self._batch(batch_size, _iterator)
    return self

def copy(

self)

def copy(self):
    return Data(_iterator=self._iterator, **self.sources)

def enumerated(

self)

def enumerated(self):
    return enumerate(self._iterator())

def epochs(

self, epochs)

docstring for Batcher

@immutable
def epochs(self, epochs):
    """docstring for Batcher"""
    _iterator = self._iterator
    self._iterator = lambda: self._epochs(epochs, _iterator)
    return self

def placeholders(

self, *args)

def placeholders(self, *args):
    return list(self._placeholders(*args))

def raw_data(

self)

@immutable
def raw_data(self):
    self._iterator = lambda: self._raw_data()
    return self

def run(

self, sess, tensor, tensors={}, **feed)

def run(self, sess, tensor, tensors={}, **feed):
    try:
        tensor = tensor.tensor()
    except:
        pass
    feed = { feed[k]: self.sources[k] for k in feed }
    feed.update(tensors)
    return sess.run(tensor, feed_dict=feed)

def split(

self, *splits)

docstring for Batcher

def split(self, *splits):
    """docstring for Batcher"""
    data_length = len(self.x)
    indexes = range(data_length)
    random.shuffle(indexes)
    splits = [0] + list(splits)
    splits_total = sum(splits)
    return (
        query(splits)
        .scan()
        .select(lambda n: int(data_length * n / splits_total))
        .then(_window, n=2)
        .select(lambda (start, end): np.array(indexes[start:end]))
        .select(lambda split: Data(**{k: source[split,:] for (k, source) in self.sources.iteritems()}))
        .to_list()
    )