Source code for jsonstat.dimension

# -*- coding: utf-8 -*-
# This file is part of https://github.com/26fe/jsonstat.py
# Copyright (C) 2016-2017 gf <gf@26fe.com>
# See LICENSE file

# stdlib
from __future__ import print_function
from __future__ import unicode_literals
from collections import namedtuple
import json

# packages
import terminaltables

# jsonstat
from jsonstat.exceptions import JsonStatException
from jsonstat.exceptions import JsonStatMalformedJson

JsonStatCategory = namedtuple('JsonStatCategory', ['label', 'index', 'pos'])


[docs]class JsonStatDimension: """Represents a JsonStat Dimension. It is contained into a JsonStat Dataset. >>> from jsonstat import JsonStatDimension >>> json_string = '''{ ... "label" : "concepts", ... "category" : { ... "index" : { "POP" : 0, "PERCENT" : 1 }, ... "label" : { "POP" : "population", ... "PERCENT" : "weight of age group in the population" } ... } ... } ... ''' >>> dim = JsonStatDimension(did="concept", role="metric").from_string(json_string) >>> len(dim) 2 >>> dim.category(0).index 'POP' >>> dim.category('POP').label 'population' >>> dim.category(1) JsonStatCategory(label='weight of age group in the population', index='PERCENT', pos=1) >>> print(dim) +-----+-----------+-----------------------------------------+ | pos | idx | label | +-----+-----------+-----------------------------------------+ | 0 | 'POP' | 'population' | | 1 | 'PERCENT' | 'weight of age group in the population' | +-----+-----------+-----------------------------------------+ >>> json_string_dimension_sex = ''' ... { ... "label" : "sex", ... "category" : { ... "index" : { ... "M" : 0, ... "F" : 1, ... "T" : 2 ... }, ... "label" : { ... "M" : "men", ... "F" : "women", ... "T" : "total" ... } ... } ... } ... ''' >>> dim = JsonStatDimension(did="sex").from_string(json_string_dimension_sex) >>> len(dim) 3 """
[docs] def __init__(self, did=None, size=None, pos=None, role=None): """initialize a dimension .. warning:: this is an internal library function (it is not public api) :param did: id of dimension :param size: size of dimension (nr of values) :param pos: position of dimension into the dataset :param role: of dimension """ # it is valid if is correctly built (f.e. it was parsed correctly) self.__valid = False self.__did = did self.__size = size self.__role = role self.__pos = pos self.__label = None # if indexes are not present in json __idx2cat will be None # if labels are not present in json __lbl2cat will be None self.__pos2cat = None # int -> cat self.__idx2cat = None # idx -> cat self.__lbl2cat = None # lbl -> cat
# # queries # dimension properties @property def did(self): """id of this dimension""" return self.__did @property def label(self): """label of this dimension""" return self.__label @property def role(self): """role of this dimension (can be time, geo or metric)""" return self.__role @property def pos(self): """position of this dimension with respect to the data set to which this dimension belongs""" return self.__pos
[docs] def __len__(self): """size of this dimension""" return self.__size
def __to_list(self): lst = [["pos", "idx", "label"]] for cat in self.__pos2cat: idx = cat.index lbl = cat.label if idx is None: idx = "" if lbl is None: lbl = "" row = [str(cat.pos), "'" + idx + "'", "'" + lbl + "'"] row = list(map(lambda x: "" if x is None else x, row)) lst.append(row) return lst def __str__(self): if self.__pos2cat is None: return "" lst = self.__to_list() table = terminaltables.AsciiTable(lst) out = table.table return out def __repr__(self): """used by ipython to make a better representation""" return self.__str__() def _repr_html_(self): lst = self.__to_list() html = "<table>" maxlines = 5 nr_line = 0 while nr_line < maxlines and nr_line < len(lst): l = lst[nr_line] html += "<tr>" for c in l: html += "<td>{}</td>".format(c) html += "</tr>" nr_line += 1 if nr_line < len(lst): html += "<td>...</td>" * len(lst[0]) html += "</table>" return html # # queries # categories #
[docs] def category(self, spec): """return JsonStatCategory according to spec :param spec: can be index (string) or label (string) or a position (integer) :returns: a JsonStatCategory """ if not self.__valid: raise JsonStatException("dimension '{}': is not initialized".format(self.__did)) if isinstance(spec, int) and spec < len(self.__pos2cat): cat = self.__pos2cat[spec] return cat # try first indexes if spec in self.__idx2cat: cat = self.__idx2cat[spec] return cat if self.__lbl2cat is not None and spec in self.__lbl2cat: cat = self.__lbl2cat[spec] return cat raise JsonStatException("dimension '{}': unknown index or label '{}'".format(self.__did, spec))
def _pos2cat(self, pos): """get the category associated with the position (integer) :param pos: integer :returns: the label or None if the label not exists at position pos ex.: JsonStatCategory(index='2013', label='2013', pos=pos) """ if not self.__valid: raise JsonStatException("dimension '{}': is not initialized".format(self.__did)) if self.__pos2cat is None: return None return self.__pos2cat[pos] def _idx2pos(self, idx): """from index to position :param idx: index for ex.: "2013" :returns: integer """ if not self.__valid: raise JsonStatException("dimension '{}': is not initialized".format(self.__did)) if idx not in self.__idx2cat: raise JsonStatException("dimension '{}': do not have index '{}'".format(self.__did, idx)) return self.__idx2cat[idx].pos def _lbl2pos(self, lbl): """from label to position :param lbl: index for ex.: "2013" :returns: integer """ if not self.__valid: raise JsonStatException("dimension '{}': is not initialized".format(self.__did)) if lbl not in self.__idx2cat: raise JsonStatException("dimension '{}': do not have label {}".format(self.__did, lbl)) return self.__lbl2cat[lbl].pos # # parsing methods #
[docs] def from_string(self, json_string): """parse a json string :param json_string: :returns: itself to chain calls """ json_data = json.loads(json_string) self.from_json(json_data) return self
[docs] def from_json(self, json_data): """Parse a json structure representing a dimension From `json-stat.org <https://json-stat.org/format/#dimensionid>`_ It is used to describe a particular dimension. The name of this object must be one of the strings in the id array. There must be one and only one dimension ID object for every dimension in the id array. jsonschema for dimension is about:: "dimension": { "type": "object", "properties": { "version": {"$ref": "#/definitions/version"}, "href": {"$ref": "#/definitions/href"}, "class": {"type": "string", "enum": ["dimension"]}, "label": {"type": "string"}, "category": {"$ref": "#/definitions/category"}, "note": {"type": "array"}, }, "additionalProperties": false }, :param json_data: :returns: itself to chain call """ # children category, label, class if 'label' in json_data: self.__label = json_data['label'] if 'class' in json_data: if json_data['class'] != 'dimension': msg = "class must be equals to 'dimension'" raise JsonStatMalformedJson(msg) # parsing category if "category" not in json_data: msg = "dimension '{}': missing category key".format(self.__did) raise JsonStatMalformedJson(msg) self.__parse_category(json_data['category']) self.__valid = True return self
def __parse_category(self, json_data_category): """It is used to describe the possible values of a dimension. See https://json-stat.org/format/#category :param json_data_category: :returns: jsonschema for dimension is about:: "category": { "type": "object", "properties": { "index": {"$ref": "#/definitions/category_index"}, "label": {"type": "object"}, "unit": {"$ref": "#/definitions/category_index"}, "child": {"type": "object", "properties": {"additionalProperties": {"type": "array"}}}, "coordinates": {"type": "object", "properties": {"additionalProperties": {"type": "array"}}}, "note": {"type": "array"} }, "additionalProperties": false }, """ # validate: label or index must be present if 'index' not in json_data_category and 'label' not in json_data_category: msg = "dimension '{}': one of keys 'label' or 'index' must be presents" raise JsonStatMalformedJson(msg) if 'index' in json_data_category: self.__parse_json_index(json_data_category) if 'label' in json_data_category: self.__parse_json_label(json_data_category) # validate: number of indexes and labels must the same?? if self.__idx2cat is not None and self.__lbl2cat is not None: if len(self.__idx2cat) != len(self.__lbl2cat): # TODO: cannot raise exception, emit warning see hierarchy.json msg = "dimension '{}': the number of indexes ({}) are different of the numbers of labels ({})" msg = msg.format(self.__did, len(self.__idx2cat), len(self.__lbl2cat)) # raise JsonStatMalformedJson(msg) if len(self.__idx2cat) < len(self.__lbl2cat): msg = "dimension '{}': the number of labels ({}) are greater than number of indexes ({})" msg = msg.format(self.__did, len(self.__lbl2cat), len(self.__idx2cat)) raise JsonStatMalformedJson(msg) # validate: indexes must be consistent with size if self.__size != len(self.__idx2cat): msg = "dimension '{}': malformed json: number of indexes {} not match with size {}" msg = msg.format(self.__did, len(self.__idx2cat), self.__size) raise JsonStatMalformedJson(msg) # validate: no hole in the indexes if any(v is None for v in self.__pos2cat): msg = "dimension '{}':hole in index".format(self.__did) raise JsonStatMalformedJson(msg) # "category_unit": { # "type": "object", # "properties": { # "additionalProperties": { # "type": "object", # "properties": {"label": {"type": "string"}, # "decimals": {"type": "number"}, # "type": {"type": "string"}, # "base": {"type": "string"}, # "multiplier": {"type": "number"}, # "position": {"type": "string"}}, # "additionalProperties": false # } # } # }, # TODO: parse 'unit' # "unit" : { # "exp" : { # "decimals": 1, # "label" : "millions", # "symbol" : "$", # "position" : "start" # } # } # "category" : { # "label" : { # "UNR" : "unemployment rate" # }, # "unit" : { # "UNR" : { # "label" : "%", # "decimals" : 9, # "type" : "ratio", # "base" : "per cent", # "multiplier" : 0 # } # } # } if 'unit' in json_data_category: if self.__role != "metric": msg = "dimension {}: 'unit' can be used only when role is 'metric'" msg = msg.format(self.__did) JsonStatException(msg) self.__unit = json_data_category['unit'] def __parse_json_index(self, json_data): """parse index json structure for ex. the json structure could be "category" : { "index" : { "2003" : 0, "2004" : 1, "2005" : 2, "2006" : 3 } :param json_data: json structure """ json_data_index = json_data['index'] if self.__size is None: self.__size = len(json_data_index) self.__idx2cat = {} # preallocate a list of length self.size with default value None self.__pos2cat = self.__size * [None] if type(json_data_index) is list: for pos, idx in enumerate(json_data_index): self.__parse_json_index_helper(idx, pos) else: for idx, pos in json_data_index.items(): self.__parse_json_index_helper(idx, pos) def __parse_json_index_helper(self, idx, pos): if pos >= self.__size: msg = "dimension '{}': index {} is greater than size {}" msg = msg.format(self.__did, pos, self.__size) raise JsonStatException(msg) cat = JsonStatCategory(pos=pos, index=idx, label=None) self.__pos2cat[pos] = cat self.__idx2cat[idx] = cat def __parse_json_label(self, json_data): """parse label structure "category" : {"label" : { "CA" : "Canada" }} :param json_data: json structure to parse """ json_data_label = json_data['label'] if self.__size is None: self.__size = len(json_data_label) no_index = 'index' not in json_data if no_index: # self.__size = len(json_data['label']) self.__pos2cat = self.__size * [None] self.__idx2cat = {} self.__lbl2cat = {} for i, (idx, lbl) in enumerate(json_data_label.items()): if no_index: # if index are not defined in json, give an order to the label pos = i cat = JsonStatCategory(pos=pos, label=lbl, index=idx) else: cat = self.__idx2cat.get(idx) if cat is None: msg = "dimension '{}': label '{}' is associated with index '{}' that not exists!" msg = msg.format(self.__did, lbl, idx) raise JsonStatMalformedJson(msg) pos = cat.pos cat = JsonStatCategory(pos=pos, label=lbl, index=idx) self.__pos2cat[pos] = cat # if only labels are present into the json, deduce indexes from labels self.__idx2cat[idx] = cat self.__lbl2cat[lbl] = cat