Browse Source

separated data visualization and analysis, added new data format for latter while (mostly) keeping old in former, also probably a bunch of fixes.

master
phryk 1 month ago
parent
commit
47eb89ca70
31 changed files with 2550 additions and 1648 deletions
  1. +200
    -103
      example.py
  2. +1
    -0
      poobrains/__init__.py
  3. +4
    -4
      poobrains/analysis/__init__.py
  4. +584
    -0
      poobrains/analysis/data.py
  5. +916
    -0
      poobrains/analysis/editor.py
  6. +1
    -3
      poobrains/analysis/geo.py
  7. +0
    -1486
      poobrains/analysis/plot.py
  8. +0
    -0
      poobrains/analysis/util.py
  9. +362
    -0
      poobrains/analysis/visualization.py
  10. +26
    -2
      poobrains/auth/__init__.py
  11. +1
    -1
      poobrains/form/__init__.py
  12. +31
    -14
      poobrains/form/fields.py
  13. +4
    -2
      poobrains/md/__init__.py
  14. +0
    -1
      poobrains/storage/__init__.py
  15. +301
    -0
      poobrains/svg.py
  16. +11
    -1
      poobrains/themes/default/dataset.jinja
  17. +9
    -0
      poobrains/themes/default/ephemeraldataset.jinja
  18. +4
    -8
      poobrains/themes/default/form/dataeditor.jinja
  19. +4
    -1
      poobrains/themes/default/form/fields/accessfield.jinja
  20. +4
    -1
      poobrains/themes/default/form/fields/checkbox.jinja
  21. +4
    -1
      poobrains/themes/default/form/fields/field.jinja
  22. +4
    -1
      poobrains/themes/default/form/fields/file.jinja
  23. +5
    -0
      poobrains/themes/default/form/fields/message.jinja
  24. +4
    -0
      poobrains/themes/default/form/fields/primarykeyfield.jinja
  25. +4
    -1
      poobrains/themes/default/form/fields/radio.jinja
  26. +4
    -1
      poobrains/themes/default/form/fields/select.jinja
  27. +4
    -1
      poobrains/themes/default/form/fields/textarea.jinja
  28. +4
    -0
      poobrains/themes/default/form/fieldset.jinja
  29. +51
    -13
      poobrains/themes/default/main.scss
  30. +1
    -1
      poobrains/themes/default/svg/barchart-inline.jinja
  31. +2
    -2
      poobrains/themes/default/svg/svg.jinja

+ 200
- 103
example.py View File

@@ -124,150 +124,245 @@ def list_nonexposed(listing):
return listing


@poobrains.analysis.plot.datasource(title='NASA Near Earth Orbits')
def neo_approaches(spkid: int = 2099942) -> dict: # default spkid for apophis
class NEO_Approaches(poobrains.analysis.EphemeralDataset):

try:
spkid = int(spkid)
except ValueError:
raise poobrains.errors.ValidationError("Invalid SPK ID; must be an integer.")
title='NASA Near Earth Orbits'

@classmethod
def load(cls, spkid: int = 2099942): # default spkid for apophis

try:
spkid = int(spkid)
except ValueError:
raise poobrains.errors.ValidationError("Invalid SPK ID; must be an integer.")

response = requests.get('https://api.nasa.gov/neo/rest/v1/neo/%d?api_key=DEMO_KEY' % spkid)

if response.status_code != 200:
raise poobrains.errors.ValidationError("NASA API responded with error code %d." % response.status_code)

data = json.loads(response.text)

ds = cls()
ds.title = "Close approaches of %s" % (data['name_limited'] if 'name_limited' in data else data['name'])
ds.description: "**%s** belongs to orbit class **%s**; %s" % (data['name'], data['orbital_data']['orbit_class']['orbit_class_type'], data['orbital_data']['orbit_class']['orbit_class_description'])

ds.plot_data = {
'kind': 'line',
'layers': {
'approaches': {
'x': 'time',
'y': 'distance'
}
}
}

ds['time'] = {
'title': 'Time',
'dtype': 'datetime64',
'color': None,
'observations': {},
}

response = requests.get('https://api.nasa.gov/neo/rest/v1/neo/%d?api_key=DEMO_KEY' % spkid)
ds['distance'] = {
'title': 'Distance (km)',
'dtype': 'float64',
'color': None,
'observations': {},
}

if response.status_code != 200:
raise poobrains.errors.ValidationError("NASA API responded with error code %d." % response.status_code)
for index, observation in enumerate(data['close_approach_data']):

data = json.loads(response.text)
observation_time = datetime.datetime.fromtimestamp(observation['epoch_date_close_approach'] / 1000)
observation_distance = float(observation['miss_distance']['kilometers'])

layer = {
'title': "Close approaches of %s" % (data['name_limited'] if 'name_limited' in data else data['name']),
'description': "**%s** belongs to orbit class **%s**; %s" % (data['name'], data['orbital_data']['orbit_class']['orbit_class_type'], data['orbital_data']['orbit_class']['orbit_class_description']),
'label_x': 'Time',
'label_y': 'Distance',
'type_x': 'datetime64',
'type_y': 'float64',
'color': None,
'points': [],
}
ds['time']['observations'][index] = {
'value': observation_time
}

for approach in data['close_approach_data']:
ds['distance']['observations'][index] = {
'value': observation_distance
}

x = datetime.datetime.fromtimestamp(approach['epoch_date_close_approach'] / 1000)
y = float(approach['miss_distance']['kilometers'])
return ds

layer['points'].append({
'x': x,
'y': y
})

return layer
class Stock_Weekly(poobrains.analysis.EphemeralDataset):

title = 'Stock (weekly)'

@poobrains.analysis.plot.datasource('Stock (weekly)')
def stock_weekly(symbol='FB'):
@classmethod
def load(cls, symbol='FB'):

response = requests.get('https://www.alphavantage.co/query?function=TIME_SERIES_WEEKLY&symbol=%s&apikey=%s' % (symbol, app.config['ALPHAVANTAGE_API_KEY']))
response = requests.get('https://www.alphavantage.co/query?function=TIME_SERIES_WEEKLY&symbol=%s&apikey=%s' % (symbol, app.config['ALPHAVANTAGE_API_KEY']))

if response.status_code != 200:
raise poobrains.errors.ValidationError("AlphaVantage API responded with error code %d." % response.status_code)
if response.status_code != 200:
raise poobrains.errors.ValidationError("AlphaVantage API responded with error code %d." % response.status_code)

data = json.loads(response.text)
data = json.loads(response.text)

dates = [x for x in data['Weekly Time Series'].keys()]
first_date = dates[-1]
last_date = dates[0]
dates = [x for x in data['Weekly Time Series'].keys()]
first_date = dates[-1]
last_date = dates[0]

ds = cls()
ds.title = symbol
ds.description = f"Data for stock symbol **{symbol}** from *{first_date}* to *{last_date}*."

layer = {
'title': symbol,
'description': f"Data for stock symbol **{symbol}** from *{first_date}* to *{last_date}*.",
'label_x': 'Time',
'label_y': 'Price ($)',
'type_x': 'float64',
'type_y': 'float64',
'color': None,
'points': []
}
for datestring, datapoint in data['Weekly Time Series'].items():
ds.plot_data = {
'kind': 'line',
'layers': {
symbol: {
'x': 'time',
'y': 'price'
}
}
}

y, m, d = [int(x) for x in datestring.split('-')]
date = datetime.datetime(y, m, d)
ds['time'] = {
'title': 'Time',
'description': f"Data for stock symbol **{symbol}** from *{first_date}* to *{last_date}*.",
'dtype': 'float64',
'color': None,
'observations': {}
}

x = date.timestamp()
y = float(datapoint['4. close'])
#error_lower = y - float(datapoint['3. low'])
#error_upper = y - float(datapoint['2. high'])
ds['price'] = {
'title': 'Price ($)',
'description': f"Data for stock symbol **{symbol}** from *{first_date}* to *{last_date}*.",
'dtype': 'float64',
'color': None,
'observations': {}
}

layer['points'].append({'x': x, 'y': y})
for index, (datestring, datapoint) in enumerate(data['Weekly Time Series'].items()):

return layer
y, m, d = [int(x) for x in datestring.split('-')]
date = datetime.datetime(y, m, d)

ds['time']['observations'][index] = {'value': date.timestamp()}
ds['price']['observations'][index] = {'value': float(datapoint['4. close'])}
#error_lower = y - float(datapoint['3. low'])
#error_upper = y - float(datapoint['2. high'])

return ds


class ConstrainedRandom(poobrains.analysis.EphemeralDataset):

title = 'Constrained random'

@poobrains.analysis.plot.datasource('Constrained random')
def constrained_random(magnitude: int = 2, length: int = 16) -> dict:
@classmethod
def load(cls, magnitude: int = 2, length: int = 16):

magnitude, length = int(magnitude), int(length)
magnitude, length = int(magnitude), int(length)

ranges = []
for i in range(0, magnitude):
ranges.append(sorted((random.random(), random.random())))
ranges = []
for i in range(0, magnitude):
ranges.append(sorted((random.random(), random.random())))

layer = {
'title': f"Constrained random ({magnitude}, {length})",
'description': f"ConstrainedRandom of magnitude {magnitude} and length {length}.",
'label_x': 'x',
'label_y': 'y',
'type_x': 'int64',
'type_y': 'float64',
'color': None,
'points': [],
}
ds = cls()
ds.title = f"Constrained random ({magnitude}, {length})"
ds.description = f"ConstrainedRandom of magnitude {magnitude} and length {length}."

for x in range(0, length):
ds.plot_data = {
'kind': 'line',
'layers': {
'plot': {
'x': 'x',
'y': 'y'
}
}
}

y = random.random()
for r in ranges:
ds['x'] = {
'title': "X",
'description': "Equidistant steps for X-axis.",
'dtype': 'int64',
'color': None,
'observations': {},
}

ds['y'] = {
'title': "Y",
'description': "Random values for Y-axis.",
'dtype': 'float64',
'color': None,
'observations': {},
}

for index, x in enumerate(range(0, length)):

y = random.random()
for r in ranges:
y = r[0] + (r[1] - r[0]) * y

r = ranges[round((len(ranges) - 1) * random.random())] # select random range
y = r[0] + (r[1] - r[0]) * y

r = ranges[round((len(ranges) - 1) * random.random())] # select random range
y = r[0] + (r[1] - r[0]) * y
y *= math.sin(2 * math.pi * float(i) / length)

ds['x']['observations'][index] = {'value': x}
ds['y']['observations'][index] = {'value': y}

return ds


y *= math.sin(2 * math.pi * float(i) / length)
class Sine(poobrains.analysis.EphemeralDataset):

layer['points'].append({'x': x, 'y': y})
title = 'Sine'

return layer
@classmethod
def load(cls, length: int=10):

ds = cls()
ds.title = f"Sine ({length})"
ds.description = f"A full sine wave out of {length} points."
ds.plot_data = {
'kind': 'line',
'layers': {
'plot': {
'x': 'x',
'y': 'y'
}
}
}

@poobrains.analysis.plot.datasource('Sine')
def sine(length: int=10) -> dict:
inc = (2 * math.pi) / (length - 1)

layer = {
'title': f"Sine ({length})",
'description': f"A full sine wave out of {length} points.",
'label_x': 'x',
'label_y': 'y',
'type_x': 'float64',
'type_y': 'float64',
'color': None,
'points': []
}
ds['x'] = {
'title': "X",
'description': f"x for `sin(x)`; spaced at equidistance intervals of {inc}.",
'dtype': 'float64',
'color': None,
'observations': {}
}

inc = (2 * math.pi) / (length - 1)
x = 0
for _ in range(0, length):
layer['points'].append({
'x': x,
'y': math.sin(x)
})
x += inc
ds['y'] = {
'title': "Y",
'description': "Result of `sin(x)`",
'dtype': 'float64',
'color': None,
'observations': {}
}

return layer
x = 0
for index in range(0, length):

ds['x']['observations'][index] = {'value': x}
ds['y']['observations'][index] = {'value': math.sin(x)}
x += inc

return ds


class RandomMap(poobrains.analysis.geo.EphemeralGeoData):

def fill(self):
@classmethod
def load(cls):

ds = cls()

data = poobrains.analysis.geo.geojson.MultiPolygon()

@@ -280,7 +375,9 @@ class RandomMap(poobrains.analysis.geo.EphemeralGeoData):

data.coordinates.append([points])

self.data = poobrains.analysis.geo.geojson.dumps(data)
ds.data = poobrains.analysis.geo.geojson.dumps(data)

return ds


if __name__ == '__main__':


+ 1
- 0
poobrains/__init__.py View File

@@ -585,6 +585,7 @@ class Poobrain(flask.Flask):
r = flask.send_from_directory(os.path.dirname(current_path), os.path.basename(current_path))

if r:
r.cache_control.private = True
r.cache_control.public = True
r.cache_control.max_age = app.config['CACHE_LONG']
return r


+ 4
- 4
poobrains/analysis/__init__.py View File

@@ -2,12 +2,12 @@

from poobrains import app

from . import base
from . import plot
from . import util
from . import data
from . import geo

EphemeralDataset = plot.EphemeralDataset
Dataset = plot.Dataset
EphemeralDataset = data.EphemeralDataset
Dataset = data.Dataset

EphemeralGeoData = geo.EphemeralGeoData
GeoData = geo.GeoData

+ 584
- 0
poobrains/analysis/data.py View File

@@ -0,0 +1,584 @@
# -*- coding: utf-8 -*-

import collections
import io
import re
import math
import time
import datetime
import json
import bson
import numpy
import pandas
import geopandas

from poobrains import Markup, app, abort, g, session, locked_cached_property

import poobrains.helpers
import poobrains.errors
import poobrains.form
import poobrains.storage
import poobrains.auth
import poobrains.commenting
import poobrains.svg

from . import util, visualization, editor


# to look up supported types by dtype name
__types__ = {
'int32': numpy.int32,
'int64': numpy.int64,
'uint32': numpy.uint32,
'uint64': numpy.uint64,
'float32': numpy.float32,
'float64': numpy.float64,
'datetime64': numpy.datetime64,
'geometry': geopandas.base.BaseGeometry,
}


# !! ANY TYPE ADDED TO ONE OF THE __casts_* DICT NEEDS TO BE ADDED TO ALL OTHERS AS WELL !!

__casts_numpy_string__ = {
'int32': str,
'int64': str,
'uint32': str,
'uint64': str,
'float32': str,
'float64': str,
'datetime64': lambda x: x.item().isoformat(),
'geometry': lambda x: geojson.dumps(x),
}

__casts_string_numpy__ = {
'int32': numpy.int32,
'int64': numpy.int64,
'uint32': numpy.uint32,
'uint64': numpy.uint64,
'float32': numpy.float32,
'float64': numpy.float64,
'datetime64': lambda x: numpy.datetime64(datetime.datetime.fromisoformat(x)),
'geometry': lambda x: geojson.loads(x),
}

# to cast data from json in db to proper builtin types
__casts_float_builtin__ = {
'int32': int,
'int64': int,
'uint32': int,
'uint64': int,
'float32': float,
'float64': float,
'datetime64': lambda x: datetime.datetime.fromtimestamp(float(x)),
'geometry': None,

}


# for casting python builtin types to pandas/numpy types
__casts_builtin_pandas__ = {
'int32': numpy.int32,
'int64': numpy.int64,
'uint32': numpy.uint32,
'uint64': numpy.uint64,
'float32': numpy.float32,
'float64': numpy.float64,
'datetime64': numpy.datetime64,
'geometry': None,
}


def load_dataset(handle):

if handle.startswith('_'): # underscore marks dynamic use of data source in URLs

name, *args = handle.split('.')
name = name[1:] # remove leading underscore
if len(name) > 0 and name in datasources:

source = datasources[name]

ds = EphemeralDataset(name)
ds.title = source['title']

parameters = {}

if len(args) > len(source['parameters']):
raise poobrains.errors.ExposedError(f"Too many parameters for data source {name}. Got {len(args)}, expected up to {len(source['parameters'])}.")

all_params= list(source['parameters'].items())
for i, arg in enumerate(args):
param_name, info = all_params[i]
parameters[param_name] = info['type'](arg) # WARNING: This might be exploitable due to insufficient input sanitation

ds.data[name] = source['function'](**parameters)

else:
raise poobrains.errors.ExposedError('Unknown dynamic dataset: %s' % name)

else:

ds = Dataset.load(handle)
ds.permissions['read'].check(g.user)

return ds


class EphemeralDataset(poobrains.auth.Protected):

class Meta:

modes = collections.OrderedDict([
('teaser', 'read'),
('inline', 'read'),
('json', 'read'),
('raw', 'read'),
('full', 'read'),
])

title = None # to override Renderable's @property
description = poobrains.md.MarkdownString()

def __init__(self, name=None, title=None, description=None, data=None):

self._name = name
self.title = title or type(self).__name__
self.description = description or ''
self.plot_data = {'kind': 'scatter', 'layers': {}}
self.data = data if not data is None else {}

def __len__(self):
return len(self.data)

def __getitem__(self, column_name):
return self.data[column_name]

def __setitem__(self, column_name, column):
self.data[idx] = column

def __delitem__(self, column_name):
del(self.data[column_name])
if isinstance(self.plot_data, dict):
layers_to_delete = []
for layer_name, layer_info in self.plot_data['layers'].items():
if column_name in layer_info.values():
layers_to_delete.append(layer_name)
for layer_name in layers_to_delete:
del(self.plot_data['layers'][layer_name])

def __iter__(self):
return self.data.__iter__()
def __contains__(self, item):
return item in self.data
def items(self):
return self.data.items()

def values(self):
return self.data.values()

def keys(self):
return self.data.keys()

@property
def name(self):
return self._name or type(self).__name__.lower()

@name.setter
def name(self, value):
self._name = value

@locked_cached_property
def ref_id(self):
return "dataset-%s" % self.name

@locked_cached_property
def empty(self):
return 0 == sum([len(column['observations']) for column in self]) # sum length of all columns

def validate(self):

""" raise ValueError if data is malformed. """

for name, column in self.items():

if not isinstance(column, dict):
raise ValueError(f"Dataset '{self.name}': column '{name}' must be dict, but is {type(column).__name__}.")

required_keys = {'title', 'description', 'dtype', 'color', 'observations'}
actual_keys = set()
for required_key in required_keys:
if not required_key in column:
raise ValueError(f"Dataset '{self.name}': Key '{required_key}' missing in column '{name}'.")
actual_keys.add(required_key)

if not required_keys == actual_keys: # means we have invalid keys
invalid_keys = actual_keys - required_keys
raise ValueError(f"Dataset '{self.name}': Invalid key in column '{name}': {invalid_keys}.")

if not isinstance(column['observations'], dict):
raise ValueError(f"Dataset '{self.name}': data[{name}]['observations'] must be dict, but is {type(column['observations']).__name__}.")

for index, observation in column['observations'].items():

if not isinstance(observation, dict):
raise ValueError(f"Dataset '{self.name}': Observations must be of type dict, but data['{name}']['observations'][{index}] is {type(observation).__name__}.")

if not 'value' in observation:
raise ValueError(f"Dataset '{self.name}': Key 'value' missing in Observation: data['{name}']['observations'][{index}].")

if not isinstance(observation['value'], __types__[column['dtype']]):
raise ValueError(f"Dataset '{self.name}': Type mismatch in data['{name}']['observations'][{index}]['value'] must be {column['dtype']}, but is {type(observation['value']).__name__}.")

if 'description' in observation:

required_keys = {'title', 'text'}
actual_keys = set()
for required_key in required_keys :
if required_key not in observation['description']:
raise ValueError(f"Dataset '{self.name}': Key '{required_key}' missing in data[{name}")
actual_keys.add(required_key)

if not required_keys == actual_keys: # means we have invalid keys
invalid_keys = actual_keys - required_keys
raise ValueError(f"Dataset '{self.name}': Invalid key in column '{name}': {invalid_keys}.")

def add_plot_layer(self, x=None, y=None):

if y is None:
raise ValueError("add_plot_layer parameter 'y' must be passed and must not be None.")

if x is None:
self.plot_data['layers'][y] = {
'y': y
}

else:
self.plot_data['layers'][f'{x}-{y}'] = {
'x': x,
'y': y,
}

def column_index(self, column_name):
return [k for k in self[column_name]]

def complete_index(self):

index = set()
for column in self.values():
for idx in column['observations']:
index.add(idx)

return list(index) # list implicitly reorders

@classmethod
def deserialize(cls, raw, format='json'):

if format == 'json':
deserializer = json.loads
elif format == 'bson':
deserializer = bson.loads
else:
raise ValueError(f"Invalid serialization format: {format}, must be 'json' or 'bson'")

rough_cut = deserializer(raw)

ds = cls()
ds.title = rough_cut['title']
ds.description = poobrains.md.MarkdownString(rough_cut['description'])
ds.plot_data = rough_cut['plot_data']
ds.data = cls.deserialize_data(rough_cut['data'])

return ds

@classmethod
def deserialize_data(cls, rough_data): #TODO: better function name

data = {}
for column_name, rough_column in rough_data.items():
data[column_name] = {
'title': rough_column['title'],
'description': rough_column['description'],
'dtype': rough_column['dtype'],
'color': rough_column['color'],
'observations': {}
}

if isinstance(data[column_name]['color'], tuple):
data[column_name]['color'] = poobrains.svg.Color.from_tuplergb(data[column_name]['color'])

cast = __casts_string_numpy__[rough_column['dtype']]

for str_idx, rough_observation in rough_column['observations'].items():

idx = int(str_idx)

observation = {
'value': cast(rough_observation['value']),
}

if 'title' in rough_observation:
observation['title'] = rough_observation['title']
observation['description'] = rough_observation['description']

data[column_name]['observations'][idx] = observation

return data

def serialize(self, format='json', whole=False):

"""
* `whole`: `bool`, whether to serialize the whole dataset or just its `.data`.
"""

if format == 'json':
serializer = json.dumps
elif format == 'bson':
serializer = bson.dumps
else:
raise ValueError(f"Invalid serialization format: {format}, must be 'json' or 'bson'")

prepro_data = {}
for column_name, column in self.data.items():

prepro_column = {}
for key in column:
if key == 'color' and not column[key] is None:
prepro_column[key] = column[key].tuple_rgb()
elif key != 'observations':
prepro_column[key] = column[key]

cast = __casts_numpy_string__[column['dtype']]

prepro_observations = {}
for index, observation in column['observations'].items():

prepro_observations[index] = {
'value': cast(observation['value'])
}


prepro_column['observations'] = prepro_observations
prepro_data[column_name] = prepro_column
if whole:
return serializer({
'title': self.title,
'description': str(self.description),
'data': prepro_data,
'plot_data': self.plot_data
})

return serializer(prepro_data)

def column_to_pandas(self, column_name):

column = self[column_name]
data = {}

cast = __casts_builtin_pandas__[column['dtype']]

for index, observation in column['observations'].items():
data[index] = cast(observation['value'])

return geopandas.GeoSeries(data)

def to_pandas(self):

data = {}
for column_name in self:
data[column_name] = self.column_to_pandas(column_name)

return geopandas.GeoDataFrame(data)

def update_from_dataframe(self, dataframe):

app.debugger.set_trace()
for name, series in dataframe.items():
self.update_column_from_series(name, series)

for name in self:
if name not in dataframe:
del(self[name])

def update_column_from_series(self, column_name, series):

observations = {}

old_column = self[column_name]
new_column = {
'title': old_column['title'],
'description': old_column['description'],
'color': old_column['color'],
'dtype': series.dtype.name,
'observations': {},
}
for index, value in series.items():
if not math.isnan(value):
new_column['observations'][index] = {
'value': value,
}

if index in old_column['observations'] and 'title' in old_column['observations'][index]:
new_column['observations'][index]['title'] = old_column['observations'][index]['title']
new_column['observations'][index]['description'] = old_column['observations'][index]['description']

self[column_name] = new_column

def clear(self):
self.data.clear()

def render(self, mode=None):

if mode == 'json':
return Markup(self.serialize(format='json'))

return super(EphemeralDataset, self).render(mode=mode)

@property
def plottable(self):
return isinstance(self.plot_data, dict) and 'kind' in self.plot_data and 'layers' in self.plot_data and len(self.plot_data['layers']) > 0

def plot(self, kind=None, layers=None):

if not isinstance(self.plot_data, dict) and (kind is None or layers is None):
raise ValueError("No valid plot data and not enough passed information (kind, layers).")

if kind is None:
kind = self.plot_data['kind']

if not kind in visualization.plot_kinds:
raise ValueError('Unknown plot kind: %s' % kind)

if layers is None:
layers = self.plot_data['layers']
else:
layers = custom_layers

return visualization.plot_kinds[kind](dataset=self, layers=self.plot_data['layers'])

def table(self, max_rows=4):

t = poobrains.rendering.Table(columns=self.keys())
for index in self.complete_index()[:max_rows]:
row_data = []
for column in self.values():
if index in column['observations']:
value = column['observations'][index]['value']
else:
value = ""

row_data.append(value)
t.append(*row_data)

return t

def save(self, name=None, owner=None):

"""
Convert this EphemeralDataset into a Dataset and save it.
"""

ds = Dataset()

#ds.owner = owner or poobrains.auth.User.get(poobrains.auth.User.id == 1)
ds.owner = owner or g.user
now = int(time.time())
ds.name = name or poobrains.helpers.clean_string("%s-%d" % (self.name, now))
ds.title = '%s@%s' % (self.title, str(datetime.datetime.fromtimestamp(now)))
ds.description = self.description
ds.data = self.data
ds.save(force_insert=True)

return ds


class Dataset(EphemeralDataset, poobrains.commenting.Commentable):

class Meta:

modes = collections.OrderedDict([
('add', 'create'),
('teaser', 'read'),
('inline', 'read'),
('full', 'read'),
('edit', 'update'),
('delete', 'delete')
])

title = poobrains.storage.fields.CharField()
description = poobrains.md.MarkdownField(null=True)
data = poobrains.storage.fields.TextField(form_widget=None) # TextField for JSON, TODO: use BlobField and BSON
plot_data = poobrains.storage.fields.TextField(form_widget=None, null=True)
lock = poobrains.storage.fields.ForeignKeyField(poobrains.storage.SessionData, null=True, on_delete="SET NULL")

form_add = editor.DataEditor
form_edit = editor.DataEditor

def __init__(self, *args, **kwargs):

self._data = None
self._plot_data = None
return super(EphemeralDataset, self).__init__(*args, **kwargs)

def __getattribute__(self, name):

if name == 'data':
if self._data is None:
if 'data' in self.__data__:

# deserialize JSONified data
raw = json.loads(self.__data__['data'])
self._data = self.__class__.deserialize_data(raw)

else:
self.data = {} # triggers self.__setattr__(self, 'data',…) which sets self._data

return self._data

elif name == 'plot_data':
if self._plot_data is None:
if 'plot_data' in self.__data__ and not self.__data__['plot_data'] is None:
self._plot_data = json.loads(self.__data__['plot_data'])
else:
self._plot_data = {}

return self._plot_data

return super(Dataset, self).__getattribute__(name)

def __setattr__(self, name, value):

if name == 'data':

if not isinstance(value, str):
self._data = value
#value = json.dumps(value)
value = self.serialize()

elif name == 'plot_data':

if not isinstance(value, str):
self._plot_data = value
value = json.dumps(value)

super(Dataset, self).__setattr__(name, value)

@locked_cached_property
def ref_id(self):

return "dataset-%s" % self.name

def save(self, **kwargs):

#dataobj = self.data
self.__data__['data'] = self.serialize()
r = super(EphemeralDataset, self).save(**kwargs)
#self.data = dataobj
return r

+ 916
- 0
poobrains/analysis/editor.py View File

@@ -0,0 +1,916 @@
# -*- coding: utf-8 -*-

import math
import re

import poobrains

from poobrains import app, flash, g, new_session, redirect, session


def plot_kind_choices():
return [(kind, cls.__name__) for (kind, cls) in poobrains.analysis.visualization.plot_kinds.items()]

def dtype_choices():
return [(dtype_name, dtype_name) for dtype_name in poobrains.analysis.data.__types__]

def datasource_choices():
#return [(name, source['title']) for name, source in poobrains.analysis.data.datasources.items()]

choices = []
for key, cls in poobrains.analysis.data.EphemeralDataset.class_children_keyed().items():
if not issubclass(cls, poobrains.analysis.data.Dataset):
try:
cls.permissions['read'].check(g.user)

except poobrains.auth.AccessDenied:
continue # skip EphemeralDataset subclasses the current user is not allowed to read.

if hasattr(cls, 'title'):
choices.append((cls, cls.title))
else:
choices.append((cls, key))

# TODO: StoredDataset
return choices


def validate_handle_free(handle):
if handle in session['editor-sessions']:
if poobrains.analysis.data.Dataset.select().where(poobrains.analysis.data.Dataset.name == handle).count() == 1:
raise poobrains.errors.ValidationError("Dataset named '%s' already exists!" % handle)

else:
del(session['editor-sessions'][handle]) # FIXME: should technically be in a different function, i guess


@new_session
def editor_session(session):
session['editor-sessions'] = {}


class EditorPlotActionControl(poobrains.form.Fieldset):

title = "Plot actions"

def __init__(self, editor, **kwargs):
super(EditorPlotActionControl, self).__init__(**kwargs)

self.editor = editor

action = editor.session['action']

self.kind = poobrains.form.fields.Select(choices=plot_kind_choices(), default=editor.dataset.plot_data['kind'])
self.kind_apply = poobrains.form.Button('submit', label='Apply plot type')
self.cancel = poobrains.form.Button('submit', label='Cancel')

if action == 'plot.add':
self['layer_name'] = poobrains.form.fields.Text(label="Name", help_text='machine readable ([a-z], [0-9], -)', validators=[poobrains.form.validators.valid_name])
self['layer_x'] = poobrains.form.fields.Select(label="X", help_text="Which column to use as X-axis.", choices=self.editor.column_choices)
self['layer_y'] = poobrains.form.fields.Select(label="Y", help_text="Which column to use as Y-axis.", choices=self.editor.column_choices)
self.add = poobrains.form.Button('submit', label='Add')

elif action == 'plot.edit':
self['layer_name'] = poobrains.form.fields.Select(label="Layer", choices=self.editor.layer_choices)
self.edit_choose = poobrains.form.Button('submit', label='Choose')

elif action == 'plot.edit.layer':
layer_name = self.editor.session['action_data']['layer_name']
layer = self.editor.dataset.plot_data['layers'][layer_name]
self['layer_name'] = poobrains.form.fields.Select(label="Layer", choices=self.editor.layer_choices, default=layer_name)
self['layer_x'] = poobrains.form.fields.Select(label="X", help_text="Which column to use as X-axis.", choices=self.editor.column_choices, default=layer['x'])
self['layer_y'] = poobrains.form.fields.Select(label="Y", help_text="Which column to use as Y-axis.", choices=self.editor.column_choices, default=layer['y'])
self.edit_save = poobrains.form.Button('submit', label='Save')

elif action == 'plot.delete':
self['layers'] = poobrains.form.fields.Select(label="Layer(s)", choices=self.editor.layer_choices, multi=True)
self.delete = poobrains.form.Button('submit', label='Delete')

else:
self.action = poobrains.form.fields.Select(label='Action', choices=(('add', 'Add'), ('edit', 'Edit'), ('delete', 'Delete')))
self.action_choose = poobrains.form.Button('submit', label='Choose')
del(self.controls['cancel'])

def process(self, submit):

if submit == 'cancel':
self.editor.session['action'] = None
self.editor.session['action_data'] = None
flash("Cancelled action.")

elif submit == 'kind_apply':
self.editor.dataset.plot_data['kind'] = self['kind'].value
flash(f"Set plot type to '{self['kind'].value}'.")

elif submit == 'action_choose':
self.editor.session['action'] = f"plot.{self['action'].value}"

elif submit == 'add':
self.editor.dataset.plot_data['layers'][self['layer_name'].value] = {
'x': self['layer_x'].value,
'y': self['layer_y'].value
}

flash(f"Successfully added layer '{self['layer_name'].value}'.")
self.editor.session['action'] = None
self.editor.session['action_data'] = None

elif submit == 'edit_choose':
self.editor.session['action'] = 'plot.edit.layer'
self.editor.session['action_data'] = {
'layer_name': self['layer_name'].value,
}
flash(f"Now editing layer '{self['layer_name'].value}'.")

elif submit == 'edit_save':
old_name = self.editor.session['action_data']['layer_name']
new_name = self['layer_name'].value

if old_name != new_name:
self.editor.dataset.plot_data['layers'][new_name] = self.editor.dataset.plot_data['layers'][old_name]
del(self.editor.dataset.plot_data['layers'][old_name])
flash(f"Renamed layer '{old_name}' to '{new_name}'.")

self.editor.dataset.plot_data['layers'][new_name]['x'] = self['layer_x'].value
self.editor.dataset.plot_data['layers'][new_name]['y'] = self['layer_y'].value

flash("Saved layer modifications.")

elif submit == 'delete':
n = 0
for layer_name in self['layers'].value:
del(self.editor.dataset.plot_data['layers'][layer_name])
n += 1

flash(f"Deleted {n} layers.")
self.editor.session['action'] = None
self.editor.session['action_data'] = None

return redirect(self.editor.instance.url('edit'))

class EditorDatasetNew(poobrains.form.Fieldset):

title = "Add new dataset"
handle = poobrains.form.fields.Text(required=True, validators=[validate_handle_free])
new_session = poobrains.form.Button(label='Create', type='submit')

def __init__(self, editor, **kwargs):

super().__init__(**kwargs)
self.editor = editor

def process(self, submit):

handle = self['handle'].value
self.editor.instance.owner = g.user
self.editor.instance.name = handle
self.editor.instance.title = handle.capitalize()
self.editor.instance.save()

return redirect(self.editor.instance.url('edit'))


class EditorDatasetEdit(poobrains.form.Fieldset):

def __init__(self, editor, **kwargs):

super().__init__(**kwargs)

self.editor = editor
self['dataset_title'] = poobrains.form.fields.Text(label='Title', default=self.editor.dataset.title)
self['dataset_description'] = poobrains.form.fields.TextArea(label='Description', default=self.editor.dataset.description)
self.cancel = poobrains.form.Button('submit', label='Cancel')
self.apply = poobrains.form.Button('submit', label='Apply')

def process(self, submit):

if submit == 'cancel':
self.editor.session['action'] = None
self.editor.session['action_data'] = None
flash("Cancelled action.")

elif submit == 'apply':
self.editor.dataset.title = self['dataset_title'].value
self.editor.dataset.description = self['dataset_description'].value

flash("Updated dataset base info.")
self.editor.session['action'] = None
self.editor.session['action_data'] = None

return redirect(self.editor.instance.url('edit'))



class EditorColumnAdd(poobrains.form.Fieldset):

title = "Add new column"
column_name = poobrains.form.fields.Text(label="Name", help_text='machine readable ([a-z], [0-9], -)', validators=[poobrains.form.validators.valid_name])
column_title = poobrains.form.fields.Text(label="Title")
column_dtype = poobrains.form.fields.Select(label="Data type", choices=dtype_choices)
column_description = poobrains.form.fields.TextArea(label="Description")
cancel = poobrains.form.Button('submit', label='Cancel')
add = poobrains.form.Button('submit', label='Add')
def __init__(self, editor, **kwargs):

super().__init__(**kwargs)
self.editor = editor


def validate(self, submit):

if submit != 'cancel':

errors = []

if self['column_name'].value is None:
e = poobrains.errors.ValidationError("Need to set a column name.")
self['column_name'].errors.append(e)
errors.append(e)

elif self['column_name'].value in self.editor.session['data']:
e = poobrains.errors.ValidationError(f"Column already exists: '{self['column_name'].value}'")
self['column_name'].errors.append(e)
errors.append(e)

elif self['column_dtype'].value is None:
e = poobrains.errors.ValidationError("Need to choose a column data type.")
self['column_dtype'].errors.append(e)
errors.append(e)
if len(errors):
raise poobrains.errors.CompoundError(errors)

def process(self, submit):

if submit == 'cancel':
self.editor.session['action'] = None
self.editor.session['action_data'] = None
flash("Cancelled action.")

elif self.editor.session['action'] == 'add.parameters':

self.editor.session['data'][self['column_name'].value] = {
'title': self['column_title'].value or self['column_name'].value,
'description': self['description'].value,
'dtype': self['dtype'].value,
'color': color,
'observations': {}
}

self.editor.session['action'] = None
self.editor.session['action_data'] = None
flash("Created column '{self['column_name'].value}'.")

return redirect(self.editor.instance.url('edit'))


class EditorDatasetJoin(poobrains.form.Fieldset):

title = "Join dataset"
cancel = poobrains.form.Button('submit', label='Cancel')

def __init__(self, editor, **kwargs):

super().__init__(**kwargs)

self.editor = editor

if self.editor.session['action'] == 'join.finalize':

self.other = poobrains.analysis.data.EphemeralDataset.deserialize(self.editor.session['action_data']['other'])
if self.other.plottable:
self.pre = self.other.plot()

self['how'] = poobrains.form.fields.Select(
label='Join operation',
choices=(('left', 'Left'), ('right', 'Right'), ('outer', 'Outer'), ('inner', 'Inner')),
default='left',
help_text="""
* **left**: use calling frame’s index (or column if on is specified)
* **right**: use *other*’s index.
* **outer**: form union of calling frame’s index (or column if on is specified) with *other*’s index, and sort it. lexicographically.
* **inner**: form intersection of calling frame’s index (or column if on is specified) with other’s index, preserving the order of the calling’s one.

This text is ripped straight out of the pandas documentation and not really helpful. *We're sorry*. ¯\\\\_(ツ)\_/¯
PS: If you just want to add some data, go for 'left'.
"""
)
self['on'] = poobrains.form.fields.Select(
label="On",
empty_label="Use index",
choices=self.editor.column_choices,
help_text="Column used to identify matching rows by."
)

self['lsuffix'] = poobrains.form.fields.Text(label='Left suffix')
self['rsuffix'] = poobrains.form.fields.Text(label='Right suffix')
self.join = poobrains.form.Button('submit', label='Join')

else:
self['source'] = poobrains.form.fields.Select(choices=datasource_choices, type=poobrains.form.types.StorableParamType(poobrains.analysis.EphemeralDataset))
self.load = poobrains.form.Button('submit', label='Load')

if not self.editor.session['action_data'] is None:
if 'source' in self.editor.session['action_data']:
self['source'].value = self.editor.session['action_data']['source']

if editor.session['action'] == 'join.parameters':
self['source'].readonly = True

if editor.session['action'] == 'join.parameters':
self['source_parameters'] = self.editor.summon_action_parameter_fieldset(editor.session['action_data']['source'])

def validate(self, submit):

if submit != 'cancel':

errors = []
if 'source' in self and self['source'].value is None:
e = poobrains.errors.ValidationError("Need to choose a data source")
self['source'].errors.append(e)
errors.append(e)

if len(errors):
raise poobrains.errors.CompoundError(errors)

def process(self, submit):

if submit == 'cancel':
if self.editor.session['action'] in ('join.parameters', 'join.finalize'):
self.editor.session['action'] = 'join'
flash("Going back.")

else:
self.editor.session['action'] = None
self.editor.session['action_data'] = None
flash("Cancelled action.")

elif self.editor.session['action'] == 'join':

source = self['source'].value
source_parameters = poobrains.helpers.function_parameters(source.load)

if len(source_parameters) == 0:
ds = source.load()
self.editor.session['action'] = 'join.finalize'
self.editor.session['action_data']['other'] = ds.serialize(format='json', whole=True)

else:

self.editor.session['action'] = 'join.parameters'
self.editor.session['action_data'] = {
'source': source,
}

elif self.editor.session['action'] == 'join.parameters':

source = self['source'].value
source_parameters = poobrains.helpers.function_parameters(source.load)

parameters = {}
for param_name in source_parameters:
if param_name in self['source_parameters']:
parameters[param_name] = self['source_parameters'][param_name].value

if len(parameters) == len(source_parameters):
other = source.load(**parameters)

self.editor.session['action'] = 'join.finalize'
self.editor.session['action_data'] = {
'other': other.serialize(format='json', whole=True),
}

else:
flash("Internal bleeding detected.", 'error')

elif self.editor.session['action'] == 'join.finalize':

other = poobrains.analysis.data.EphemeralDataset.deserialize(self.editor.session['action_data']['other'])
try:

kw = {
'other': other.to_pandas(),
'how': self['how'].value,
'on': self['on'].value
}

if self['lsuffix'].value:
kw['lsuffix'] = self['lsuffix'].value

if self['rsuffix'].value:
kw['rsuffix'] = self['rsuffix'].value

joined = self.editor.dataset.to_pandas().join(**kw)
joined.reset_index(drop=True, inplace=True)
except ValueError as e:
flash(f"Error when joining: {str(e)}", 'error')
else:
self.editor.dataset.plot_data = other.plot_data # TODO: make this opt-in
for key in other:
if 'rsuffix' in kw:
lkey = f"{key}{kw['rsuffix']}"
else:
lkey = key
if not lkey in self.editor.dataset:
self.editor.dataset[lkey] = other[key]
self.editor.dataset.update_from_dataframe(joined)

self.editor.session['action'] = None
self.editor.session['action_data'] = None

flash(f"{self['how'].value} join on {self['on'].value or 'index'} successful.")


return redirect(self.editor.instance.url('edit'))


class EditorColumnEdit(poobrains.form.Fieldset):

title = "Edit column information"
cancel = poobrains.form.Button('submit', label='Cancel')

def __init__(self, editor, **kwargs):

super().__init__(**kwargs)
self.editor = editor

if self.editor.session['action'] == 'edit.column':

column_name = self.editor.session['action_data']['column_name']
column = self.editor.dataset[column_name]

color_value = None if column['color'] is None else column['color'].hexrgb()

self.title = f"Edit column information for '{column_name}'"

self['column_name'] = poobrains.form.fields.Text(label='Name', value=column_name, help_text='machine readable ([a-z], [0-9], -)')
self['column_title'] = poobrains.form.fields.Text(label='Title', value=column['title'])
self['column_color_use'] = poobrains.form.fields.Checkbox(label="Use custom color", value=not column['color'] is None)
self['column_color'] = poobrains.form.fields.Color(label='Color', value=color_value)
self['column_description'] = poobrains.form.fields.TextArea(label='Description', value=column['description'], help_text='Describe the axes, summarize the data and any notable trends in it.')

self.apply = poobrains.form.Button('submit', label='Apply')

else:

self['column'] = poobrains.form.fields.Select(label='Column', choices=self.editor.column_choices)

if (not self.editor.session['action_data'] is None) and 'column_name' in self.editor.session['action_data']:
self['column'].value = self.editor.session['action_data']['column_name']

self.column_choose = poobrains.form.Button('submit', label='Choose')

def process(self, submit):

if submit == 'cancel':

if self.editor.session['action'] == 'edit.column':
self.editor.session['action'] = 'edit'
flash("Going back.")

else:
self.editor.session['action'] = None
self.editor.session['action_data'] = None
flash("Cancelled action.")

elif submit == 'column_choose':

self.editor.session['action'] = 'edit.column'
self.editor.session['action_data']['column_name'] = self['column'].value
flash(f"Now editing column {self['column'].value}.")

elif submit == 'apply':

column_name = self.editor.session['action_data']['column_name']
authoritative_column_name = self['column_name'].value

if column_name != authoritative_column_name: # means this column is getting renamed.
self.editor.dataset[authoritative_column_name] = self.editor.dataset[column_name]
del(self.editor.dataset[column_name])

if self.editor.dataset.plottable:
# rename any occurence of this renamed column in plot_data
for layer_name, layer_info in self.editor.dataset.plot_data['layers'].items():
if layer_info['x'] == column_name:
layer_info['x'] = authoritative_column_name
if layer_info['y'] == column_name:
layer_info['y'] = authoritative_column_name

flash(f"Renamed column from '{column_name}' to '{authoritative_column_name}'.")

self.editor.dataset[authoritative_column_name]['title'] = self['column_title'].value
self.editor.dataset[authoritative_column_name]['description'] = self['column_description'].value
if self['column_color_use'].value:
self.editor.dataset[authoritative_column_name]['color'] = poobrains.svg.Color.from_hexrgb(self['column_color'].value)
else:
self.editor.dataset[authoritative_column_name]['color'] = None

self.editor.session['action'] = None
self.editor.session['action_data'] = None
flash(f"Updated information for column '{authoritative_column_name}'.")

return redirect(self.editor.instance.url('edit'))


class EditorColumnDelete(poobrains.form.Fieldset):

title = "Delete column"
cancel = poobrains.form.Button('submit', label='Cancel')

def __init__(self, editor, **kwargs):
super().__init__(**kwargs)
self.editor = editor
self['columns'] = poobrains.form.fields.Select(label="Column", choices=self.editor.column_choices, multi=True)
self.delete = poobrains.form.Button('submit', label='Delete')


def process(self, submit):

if submit == 'cancel':
self.editor.session['action'] = None
self.editor.session['action_data'] = None
flash("Cancelled action.")

elif submit == 'delete':

i = 0
for column_name in self['columns'].value:
del(self.editor.dataset[column_name])
i += 1
self.editor.session['action'] = None
self.editor.session['action_data'] = None
if i == 1:
flash(f"Deleted column '{column_name}'.")
else:
flash(f"Deleted {i} columns.")

else:
flash(f"{self.name} did nothing. This might be a bug.", 'warning')

return redirect(self.editor.instance.url('edit'))


class EditorFilter(poobrains.form.Fieldset):

def __init__(self, editor, **kwargs):
super().__init__(**kwargs)
self.editor = editor

if self.editor.session['action'] == 'filter': # TODO: is this even needed? will only get instantiated if action.startswith('filter')

if not 'filters' in self.editor.session['action_data']:
self.editor.session['action_data']['filters'] = []

else:
i = 0
for filter in self.editor.session['action_data']['filters']:
if filter['value_column']:
filter_value = df[filter['value_column']]
else:
filter_value = filter['value']

fieldset = poobrains.form.Fieldset(title=f"Filter #{i}")
fieldset.message = poobrains.form.fields.Message(value=f"{filter['column']} {filter['op']} {filter_value}")
fieldset.delete = poobrains.form.Button('submit', label='Delete')
self[f"filter-{i}"] = fieldset
i += 1

self.filter_params = poobrains.form.Fieldset(title="Add filter")
self.filter_params['column'] = poobrains.form.fields.Select(label='Column', choices=self.editor.column_choices)
self.filter_params['op'] = poobrains.form.fields.Select(choices=([(op, op) for op in ('<', '<=', '==', '>=', '>')]), label='Operation')
self.filter_params['value'] = poobrains.form.fields.Text(type=poobrains.form.types.FLOAT)
self.filter_params['value_column'] = poobrains.form.fields.Select(label='Other column', choices=self.editor.column_choices)

self.cancel = poobrains.form.Button('submit', label='Cancel')
self.filter_new = poobrains.form.Button('submit', label='Add another filter')
self.apply = poobrains.form.Button('submit', label='Apply')


def validate(self, submit):

errors = []

if submit != 'cancel' and len(self.editor.session['action_data']['filters']) == 0:

if self.filter_params['column'].value is None:
e = poobrains.errors.ValidationError('You must select a column.')
self.filter_params['column'].errors.append(e)
errors.append(e)

if self.filter_params['op'].value is None:
e = poobrains.errors.ValidationError('You must select an operation.')
self.filter_params['op'].errors.append(e)
errors.append(e)

if self.filter_params['value'].value is None and self.filter_params['value_column'].value is None:
e = poobrains.errors.ValidationError('You must select a value to compare against.')
if self.filter_params['value'].value is None:
self.filter_params['value'].errors.append(e)
if self.filter_params['value_column'].value is None:
self.filter_params['value_column'].errors.append(e)
errors.append(e)

if len(errors) > 0:
raise poobrains.errors.CompoundError(errors)

def process(self, submit):

if submit == 'cancel':

self.editor.session['action'] = None
self.editor.session['action_data'] = None

flash("Cancelled action.")
return redirect(self.editor.instance.url('edit'))

if not self['filter_params']['column'].empty and\
not self['filter_params']['op'].empty and\
not self['filter_params']['value'].empty:

self.editor.session['action_data']['filters'].append({
'column': self['filter_params']['column'].value,
'op': self['filter_params']['op'].value,
'value': self['filter_params']['value'].value,
'value_column': self['filter_params']['value_column'].value,
})

if submit == 'apply':

df = self.editor.dataset.to_pandas()
num_rows_old = len(df)

for filter in self.editor.session['action_data']['filters']:

if filter['value_column']:
filter_value = df[filter['value_column']]
else:
filter_value = filter['value']

if filter['op'] == '<':
df = df[df < filter_value]
elif filter['op'] == '<=':
df = df[df <= filter_value]
elif filter['op'] == '==':
df = df[df == filter_value]
elif filter['op'] == '>=':
df = df[df >= filter_value]
elif filter['op'] == '>':
df = df[df > filter_value]

for column_name, series in df.items():
if series.empty or all(series.isnull()):
del(self.editor.dataset[column_name])
flash(f"Column '{column_name}' retained 0 datapoints after filtering, deleted.")
else:
self.editor.dataset.update_column_from_series(column_name, series)

num_filters = len(self.editor.session['action_data']['filters'])
num_rows_new = len(df)

self.editor.session['action'] = None
self.editor.session['action_data'] = None

flash(f"Applied {num_filters} filters. {num_rows_new} remaining after removing {num_rows_old - num_rows_new} rows.")

else:
match = re.match('^filter-(\d*)\.delete$', submit)
if match:
idx = int(match.groups()[0])
self.editor.session['action_data']['filters'].pop(idx)
flash(f"Removed filter #{idx}")

return redirect(self.editor.instance.url('edit'))


class EditorActionControl(poobrains.form.Fieldset):

title = "Actions"

def __init__(self, editor, **kwargs):

super().__init__(**kwargs)
self.editor = editor

self.action = poobrains.form.fields.Select(choices=self.editor.action_choices, label='Action')
if len(self.action.choices) == 1:
self.action.value = self.action.choices[0][0]


if self.editor.session['action'] is None:
self.action_choose = poobrains.form.Button('submit', label='Choose')

else:

base_action = editor.session['action']
if isinstance(base_action, str) and '.' in base_action:
base_action = base_action.split('.')[0]

if base_action in self.editor.actions:

self['action'].value = base_action
self['action'].readonly = True

fieldset_class = self.editor.actions[base_action][1]
self[base_action] = fieldset_class(editor)
else:
self.action_choose = poobrains.form.Button('submit', label='Choose')

def validate(self, submit):
self.validate_submitting_fieldset(submit)

def process(self, submit):

if submit == 'action_choose':

self.editor.session['action'] = self['action'].value
self.editor.session['action_data'] = {}
return redirect(self.editor.instance.url('edit'))

else:
return self.process_submitting_fieldset(submit)


class DataEditor(poobrains.auth.BoundForm):

actions = {
'base_edit': ('Edit dataset info', EditorDatasetEdit),
'join': ('Join dataset', EditorDatasetJoin),
'add': ('Add column', EditorColumnAdd),
'edit': ('Edit column info', EditorColumnEdit),
'filter': ('Filter', EditorFilter),
'delete': ('Delete column(s)', EditorColumnDelete)
}

def __init__(self, instance, mode='add'):

super().__init__(instance, mode=mode)
self.dataset = self.instance

if mode == 'add':
self.title = 'Add new dataset'
self['new'] = EditorDatasetNew(self)

else:
self.title = f"Edit dataset '{self.instance.name}'."
if self.has_lock:

if not self.instance.name in session['editor-sessions']:

# set up a fresh editor session holding current data
self.session = {
'action': None,
'action_data': None,
'dataset': self.dataset.serialize(whole=True),
}

session['editor-sessions'][self.instance.name] = self.session
flash(f"Added editor session '{self.instance.name}'.")

else:
self.session = session['editor-sessions'][self.instance.name]
self.dataset = poobrains.analysis.data.EphemeralDataset.deserialize(self.session['dataset'])

#flash(self.session['action'], 'warning')
self['action_control'] = EditorActionControl(self)

if not self.empty:
self['plot_action_control'] = EditorPlotActionControl(self)

if self.session['action'] is None:
self.reset = poobrains.form.Button(label='Reset changes', type='submit')
self.save = poobrains.form.Button(type='submit', label='Save')

else:

if self.locked:
flash("This dataset is currently being edited in another session.", 'warning')

else:
self.start_session = poobrains.form.Button(type='submit', label='Start editing')

@property
def action_choices(self):
if hasattr(self, 'session') and len(self.session['dataset']) == 0:
return [('add', self.actions['add'][0])]
return [(name, value[0]) for name, value in self.actions.items()]

@property
def column_choices(self):
choices = []

if hasattr(self, 'session'):
for column_name, column in self.dataset.items():
choices.append((column_name, f"{column_name}: {column['title']}"))

return choices

@property
def layer_choices(self):
return [(layer_name, layer_name) for layer_name in self.dataset.plot_data['layers'].keys()]

@property
def locked(self):
if isinstance(self.instance, poobrains.analysis.data.Dataset):
try:
if not self.instance.lock is None:
if self.instance.lock.expired:
self.instance.lock.delete_instance() # delete expired session, clearing the lock via ON UPDATE SET NULL on Dataset.lock
for field_name in ('plot_action_control', 'action_control'):
if field_name in self:
del(self[field_name])

return False
return True

except poobrains.storage.SessionData.DoesNotExist:
self.instance.lock = None
self.instance.save()
app.logger.warning(f"Cleared lock on {self.instance.__class__.__name__} {self.instance.name}, referenced nonexistant session. Sqlite used manually without 'PRAGMA foreign_key=True;'?")
return False

@property
def has_lock(self):
if isinstance(self.instance, poobrains.analysis.data.Dataset):
try:
return self.instance.lock == session.sessiondata
except poobrains.storage.SessionData.DoesNotExist:
self.instance.lock = None
self.instance.save()
app.logger.warning(f"Cleared lock on {self.instance.__class__.__name__} {self.instance.name}, referenced nonexistant session. Sqlite used manually without 'PRAGMA foreign_key=True;'?")
return False

#@locked_cached_property
@property
def empty(self):
if isinstance(self.instance, poobrains.analysis.data.EphemeralDataset) and self.instance.name in session['editor-sessions']:
return sum([len(column['observations']) for column in self.dataset.values()]) == 0
return True

def summon_action_parameter_fieldset(self, source):

subform = poobrains.form.Fieldset()

for param_name, param_info in poobrains.helpers.function_parameters(source.load).items():
param_type = poobrains.form.types.lookup_table[param_info['type']]()
subform[param_name] = poobrains.form.fields.Text(label=param_name, type=param_type, default=param_info['default'])

return subform

def validate(self, submit):

if '.' in submit:
self.validate_submitting_fieldset(submit)

def process(self, submit):

if not self.instance.name in session['editor-sessions']:
if submit == 'start_session':
self.instance.lock = session.sessiondata
self.instance.save()
return redirect(self.instance.url('edit'))

elif submit.startswith('new.'):
pass # pass down to process_submitting_fieldset

else:
flash("Editor session lost! Did you wait too long between edits?", 'error')
return self

else:

if submit == 'save':
#self.instance.plot_kind = self.session['plot_kind']
#self.instance.layers = self.session['layers']
self.instance.plot_data = self.dataset.plot_data
self.instance.data = self.dataset.data
self.instance.lock = None
self.instance.save()
del(session['editor-sessions'][self.instance.name])
self.tags.process(submit) # also create TagBindings
flash(f"Saved data to database and removed session '{self.instance.name}'.")
return redirect(self.instance.url('edit'))

elif submit == 'reset':
self.instance.lock = None
self.instance.save()
del(session['editor-sessions'][self.instance.name])
del(self.session)
flash("Deleted editor session.")
return redirect(self.instance.url('edit'))

r = self.process_submitting_fieldset(submit)

if self.has_lock:
self.session['dataset'] = self.dataset.serialize(whole=True)

if r is None:
return self
return r

+ 1
- 3
poobrains/analysis/geo.py View File

@@ -10,8 +10,6 @@ import poobrains.storage
import poobrains.auth
import poobrains.commenting

from . import base


def bound_longitude(value):

@@ -426,7 +424,7 @@ class EphemeralGeoData(poobrains.auth.Protected):
raise NotImplementedError("%s.fill not implemented" % type(self).__name__)


def render(self, mode='full', bbox=None):
def render(self, mode='teaser', bbox=None):

if not bbox is None:
self.override_bbox(bbox)


+ 0
- 1486
poobrains/analysis/plot.py
File diff suppressed because it is too large
View File


poobrains/analysis/base.py → poobrains/analysis/util.py View File


+ 362
- 0
poobrains/analysis/visualization.py View File

@@ -0,0 +1,362 @@
import math

import poobrains

from poobrains import app, locked_cached_property
from . import util


# casts to make builtin types moar pretty
__casts_builtin_pretty__ = {
'int32': util.pretty_si,
'int64': util.pretty_si,
'uint32': util.pretty_si,
'uint64': util.pretty_si,
'float32': util.pretty_si,
'float64': util.pretty_si,
'datetime64': lambda x: f"{x.day}. {x.month}. {x.year} – {x.hour}:{x.minute}:{x.second}",
'geometry': None,
}

# to cast every supported type to float for visualization purposes
__casts_builtin_float__ = {
#'bool': float,
'int32': float,
'int64': float,
'uint32': float,
'uint64': float,
'float32': float,
'float64': float,
'datetime64': lambda x: x.timestamp(),
'geometry': None,
}

@app.expose('/svg/plot')
class Plot(poobrains.svg.SVG):


padding = None
width = None
height = None
plot_width = None
plot_height = None
description_height = None

dataset = None
length = None

class Meta:

modes = {
'teaser': 'read',
'full': 'read',
'raw': 'read',
'inline': 'read'
}

def __init__(self, handle=None, mode=None, dataset=None, layers=None, **kwargs):

super(Plot, self).__init__(handle=handle, mode=mode, **kwargs)

if handle is None and dataset is None:
abort(404, "No dataset selected")
self.padding = app.config['SVG_PLOT_PADDING']
#self.plot_width = app.config['SVG_PLOT_WIDTH']
self.plot_width = 100 - 2 * self.padding
#self.plot_height = app.config['SVG_PLOT_HEIGHT']
self.plot_height = self.plot_width
self.description_height = app.config['SVG_PLOT_DESCRIPTION_HEIGHT']
self.description_y = 100 - self.description_height
self.width = self.plot_width + (2 * self.padding)
self.height = self.plot_height + self.description_height + (3 * self.padding)

if not dataset is None:
self.dataset = dataset

else:
self.dataset = poobrains.analysis.data.load_dataset(handle)

if len(self.dataset) == 0:
raise ValueError("Empty Dataset can not be visualized")

if not layers is None:
self.layers = layers
else:
self.layers = {}
if len(dataset) >= 2: # take first two columns as x and y axis, respectively
x = next(iter(self.dataset.keys()))
y = next(next(iter(self.dataset.keys())))
self.layers[f'{x}-{y}'] = {
'x': x,
'y': y
}

else: # len(dataset) == 1; take only column as y with index as x
y = next(iter(self.dataset.keys()))
self.layers[f'{y}'] = {
'y': y,
}

self.preprocessed_data = None

@property
def length(self):
return max([len(column['observations']) for column in self.dataset.values()])
def preprocess_data(self):

index = self.dataset.complete_index()

preprocessed_data = {}
#for column_name, column in self.dataset.items():
for layer_name, layer_info in self.layers.items():

column_x = self.dataset[layer_info['x']]
column_y = self.dataset[layer_info['y']]

preprocessed_data[layer_name] = {
'id': self.layer_id(layer_name),
'title': column_y['title'],
'description': poobrains.md.MarkdownString(column_y['description']),
'dtype': column_y['dtype'],
'color': column_y['color'],
'label_x': column_x['title'],
'label_y': column_y['title'],
'points': [],
}

prettifiers = {
'x': __casts_builtin_pretty__[column_x['dtype']],
'y': __casts_builtin_pretty__[column_y['dtype']]
}

casts = {
'x': __casts_builtin_float__[column_x['dtype']],
'y': __casts_builtin_float__[column_y['dtype']]
}

for idx in index:

if idx in column_x['observations'] and idx in column_y['observations']:
x = column_x['observations'][idx]['value']
y = column_y['observations'][idx]['value']
x_float = casts['x'](x)
y_float = casts['y'](y)

preprocessed_data[layer_name]['points'].append({
'id': self.datapoint_id(layer_name, x),
'x_raw': x,
'y_raw': y,
'x_pretty': prettifiers['x'](x),
'y_pretty': prettifiers['y'](y),
'x_float': casts['x'](x),
'y_float': casts['y'](y),
})

self.preprocessed_data = preprocessed_data

@locked_cached_property
def palette(self):

named_palette = {}
palette = util.create_palette(len(self.dataset))