Source code for jsonstat.dataset

# -*- coding: utf-8 -*-
# This file is part of https://github.com/26fe/jsonstat.py
# Copyright (C) 2016-2017 gf <gf@26fe.com>
# See LICENSE file

# stdlib
from __future__ import print_function
from __future__ import unicode_literals
from functools import reduce
import json

# packages
import numpy as np
import pandas as pd
import terminaltables

# jsonstat
from jsonstat.value import JsonStatValue
from jsonstat.dimension import JsonStatDimension
from jsonstat.exceptions import JsonStatException
from jsonstat.exceptions import JsonStatMalformedJson
from jsonstat.utility import lst2html


[docs]class JsonStatDataSet:
    """Represents a JsonStat dataset

        >>> import os, jsonstat  # doctest: +ELLIPSIS
        >>> filename = os.path.join(jsonstat._examples_dir, "www.json-stat.org", "oecd-canada-col.json")
        >>> dataset = jsonstat.from_file(filename).dataset(0)
        >>> dataset.label
        'Unemployment rate in the OECD countries 2003-2014'
        >>> print(dataset)
        name:   'Unemployment rate in the OECD countries 2003-2014'
        label:  'Unemployment rate in the OECD countries 2003-2014'
        size: 432
        +-----+---------+--------------------------------+------+--------+
        | pos | id      | label                          | size | role   |
        +-----+---------+--------------------------------+------+--------+
        | 0   | concept | indicator                      | 1    | metric |
        | 1   | area    | OECD countries, EU15 and total | 36   | geo    |
        | 2   | year    | 2003-2014                      | 12   | time   |
        +-----+---------+--------------------------------+------+--------+
        >>> dataset.dimension(1)
        +-----+--------+----------------------------+
        | pos | idx    | label                      |
        +-----+--------+----------------------------+
        | 0   | 'AU'   | 'Australia'                |
        | 1   | 'AT'   | 'Austria'                  |
        | 2   | 'BE'   | 'Belgium'                  |
        | 3   | 'CA'   | 'Canada'                   |
        | 4   | 'CL'   | 'Chile'                    |
        | 5   | 'CZ'   | 'Czech Republic'           |
        | 6   | 'DK'   | 'Denmark'                  |
        | 7   | 'EE'   | 'Estonia'                  |
        | 8   | 'FI'   | 'Finland'                  |
        | 9   | 'FR'   | 'France'                   |
        | 10  | 'DE'   | 'Germany'                  |
        | 11  | 'GR'   | 'Greece'                   |
        | 12  | 'HU'   | 'Hungary'                  |
        | 13  | 'IS'   | 'Iceland'                  |
        | 14  | 'IE'   | 'Ireland'                  |
        | 15  | 'IL'   | 'Israel'                   |
        | 16  | 'IT'   | 'Italy'                    |
        | 17  | 'JP'   | 'Japan'                    |
        | 18  | 'KR'   | 'Korea'                    |
        | 19  | 'LU'   | 'Luxembourg'               |
        | 20  | 'MX'   | 'Mexico'                   |
        | 21  | 'NL'   | 'Netherlands'              |
        | 22  | 'NZ'   | 'New Zealand'              |
        | 23  | 'NO'   | 'Norway'                   |
        | 24  | 'PL'   | 'Poland'                   |
        | 25  | 'PT'   | 'Portugal'                 |
        | 26  | 'SK'   | 'Slovak Republic'          |
        | 27  | 'SI'   | 'Slovenia'                 |
        | 28  | 'ES'   | 'Spain'                    |
        | 29  | 'SE'   | 'Sweden'                   |
        | 30  | 'CH'   | 'Switzerland'              |
        | 31  | 'TR'   | 'Turkey'                   |
        | 32  | 'UK'   | 'United Kingdom'           |
        | 33  | 'US'   | 'United States'            |
        | 34  | 'EU15' | 'Euro area (15 countries)' |
        | 35  | 'OECD' | 'total'                    |
        +-----+--------+----------------------------+
        >>> dataset.data(0)
        JsonStatValue(idx=0, value=5.943826289, status=None)
    """

[docs]    def __init__(self, name=None):
        """Initialize an empty dataset.

        Dataset could have a name (key) if we parse a jsonstat format version 1.

        :param name: dataset name (for jsonstat v.1)
        """
        self.__valid = False

        self.__name = name
        self.__title = None
        self.__label = None
        self.__source = None

        # dimensions
        self.__dim_nr = 0  # len(self.__pos2dim)

        self.__pos2size = []  # array int -> int (dimension size)
        self.__pos2mult = None  # array int -> multiplicative factor

        self.__pos2dim = []  # array int -> dim
        self.__did2dim = {}  # dict  id  -> dim
        self.__lbl2dim = {}  # dict  lbl -> dim

        self.__value = None
        self.__status = None

    @property
    def name(self):
        """
        :getter: returns the name of the dataset
        :type: string
        """
        return self.__name

    @property
    def label(self):
        """
        :getter: returns the label of the dataset
        :type: string
        """
        return self.__label

    def __str__(self):
        out = ""
        if self.__name is not None:
            out += "name:   '{}'\n".format(self.__name)

        if self.__title is not None:
            out += "title:  '{}'\n".format(self.__title)

        if self.__label is not None:
            out += "label:  '{}'\n".format(self.__label)

        if self.__source is not None:
            out += "source: '{}'\n".format(self.__label, self.__source)

        out += "size: {}".format(len(self))
        out += "\n"
        out += self.__str__dimensions()
        return out

    def __repr__(self):
        """used by ipython to make a better representation"""
        return self.__str__()

    def _repr_html_(self):
        """used by ipython to make a better representation"""

        html = ""
        if self.__name is not None:
            html += "name:   '{}'</br>".format(self.__name)

        if self.__title is not None:
            html += "title:  '{}'</br>".format(self.__title)

        if self.__label is not None:
            html += "label:  '{}'</br>".format(self.__label)

        if self.__source is not None:
            html += "source: '{}'</br>".format(self.__label, self.__source)

        html += "size: {}</br>".format(len(self))
        lst = self.__dim_to_table()
        html += lst2html(lst)
        return html

[docs]    def __len__(self):
        """returns the size of the dataset"""
        return len(self.__value)

    #
    # dimensions
    #

[docs]    def dimensions(self):
        """returns list of JsonStatDimension"""
        return self.__pos2dim

[docs]    def dimension(self, spec):
        """get a JsonStatDimension by spec

        :param spec: spec can be:
         - (string) or id of the dimension
         - int position of dimension
        :returns: a JsonStatDimension
        """
        if type(spec) is int:
            return self.__pos2dim[spec]
        if spec not in self.__did2dim:
            msg = "dataset '{}': unknown dimension '{}' know dimensions ids are: {}"
            msg = msg.format(self.__name, spec, ", ".join(
                [dim.did for dim in self.__pos2dim]))
            raise JsonStatException(msg)
        return self.__did2dim[spec]

    def __dim_to_table(self):
        lst = [["pos", "id", "label", "size", "role"]]
        for i, dim in enumerate(self.__pos2dim):
            row = [str(i), dim.did, dim.label, str(len(dim)), dim.role]
            row = list(map(lambda x: "" if x is None else x, row))
            lst.append(row)
        return lst

    def __str__dimensions(self):
        lst = self.__dim_to_table()

        table = terminaltables.AsciiTable(lst)
        # table.justify_columns = {2: "right", 4: "right"}
        out = table.table
        return out

[docs]    def info_dimensions(self):
        """print same info on dimensions on stdout"""
        print(self.__str__dimensions())

    #
    # querying value/status
    #

[docs]    def data(self, *args, **kargs):
        """Returns a JsonStatValue containings value and status about a datapoint
        The datapoint will be retrieved according the parameters

        :param args:
            - data(<int>)  where i is index into the
            - data(<list>) where lst = [i1,i2,i3,...]) each i indicate the dimension len(lst) == number of dimension
            - data(<dict>) where dict is {k1:v1, k2:v2, ...} dimension of size 1 can be ommitted

        :param kargs:
            - data(k1=v1,k2=v2,...) where **ki** are the id or label of dimension
              **vi** are the index or label of the category dimension of size 1 can be ommitted

        :returns: a JsonStatValue object

        kargs { cat1:value1, ..., cati:valuei, ... }
        cati can be the id of the dimension or the label of dimension
        valuei can be the index or label of category
        ex.:{country:"AU", "year":"2014"}

        >>> import os, jsonstat  # doctest: +ELLIPSIS
        >>> filename = os.path.join(jsonstat._examples_dir, "www.json-stat.org", "oecd-canada-col.json")
        >>> dataset = jsonstat.from_file(filename).dataset(0)
        >>> dataset.data(0)
        JsonStatValue(idx=0, value=5.943826289, status=None)
        >>> dataset.data(concept='UNR', area='AU', year='2003')
        JsonStatValue(idx=0, value=5.943826289, status=None)
        >>> dataset.data(area='AU', year='2003')
        JsonStatValue(idx=0, value=5.943826289, status=None)
        >>> dataset.data({'area':'AU', 'year':'2003'})
        JsonStatValue(idx=0, value=5.943826289, status=None)
        """
        if not self.__valid:
            raise JsonStatException('dataset not initialized')

        # decoding args
        idx = self._2idx(*args, **kargs)
        value = self.__value[idx]

        #
        # status
        #
        if self.__status is None:
            status = None
        elif isinstance(self.__status, str):
            status = self.__status
        elif isinstance(self.__status, list) and len(self.__status) == 1:
            status = self.__status[0]
        elif isinstance(self.__status, dict) and idx not in self.__status:
            status = None
        else:
            status = self.__status[idx]
        return JsonStatValue(idx, value, status)

[docs]    def value(self, *args, **kargs):
        """get a value
        For the parameters see py:meth:`jsonstat.JsonStatDataSet.data`.

        :returns: value (typically a number)
        """
        # TODO: add onlyvalue=true to extract only the value
        return self.data(*args, **kargs).value

[docs]    def status(self, *args, **kargs):
        """get datapoint status

        For the parameters see py:meth:`jsonstat.JsonStatDataSet.data`.

        :returns: status (typically a string)
        """
        # TODO: add onlystatus=true to extract only the value?
        return self.data(*args, **kargs).status

    def __value_from_vec_pos(self, lst):
        """

        :param lst: [0,3,4]
        :returns: value at dimension [0,3,4]
        """
        return self.__value[self.lint_as_idx(lst)]

    #
    # dataset can be access using different type of indexes
    # simple index is integer.
    # ex. dataset.data(0)
    # other type of idexes
    # lint [ <int1>, <int2>, <int3> ...]
    # lcat [ <cat1>, <cat2>, ... ]
    # dcat [ <did1>:<cat1>, <did2>:<cat1>, ... ]

    # this functions are only for library internal usage
    #

    def _2idx(self, *args, **kargs):
        """convert args to integer index """

        if len(args) == 1:
            # data(int)
            if isinstance(args[0], int):
                return args[0]
            # data([i1,i2,i3])
            elif isinstance(args[0], list):
                idx = self.lint_as_idx(args[0])
                return idx
            # data({k1:v1, k2:v2})
            elif isinstance(args[0], dict):
                dims = args[0]
                apos = self.dcat_to_lint(dims)
                idx = self.lint_as_idx(apos)
                return idx
        elif len(args) == 0:
            # data(k1:v1, k2:v2)
            dims = kargs
            # print(dims)
            apos = self.dcat_to_lint(dims)
            # print(apos)
            idx = self.lint_as_idx(apos)
            # print(idx)
            return idx

        msg = "unexpected parameters"
        raise JsonStatException(msg)

    def dcat_to_lint(self, dims):
        """Transforms a dimension dict to dimension array

        ::

            {"country":"AU", "year":2014} -> [1,2,3]

        :param dims: keys are dimension (id or label), value are categories
             "country" is the id of dimension
             "AU" is the category of dimension
        :returns: a list of integer
        """
        apos = len(self.__pos2dim) * [0]
        for (cat, val) in dims.items():
            # key is id
            if cat in self.__did2dim:
                dim = self.__did2dim[cat]
            # key is label
            elif cat in self.__lbl2dim:
                dim = self.__lbl2dim[cat]
            # key is not id or label so raise error
            else:
                allowed_categories = ", ".join(
                    ["'{}'".format(dim.did) for dim in self.__pos2dim])
                msg = "dataset '{}': category '{}' don't exists allowed categories are: {}"
                msg = msg.format(self.__name, cat, allowed_categories)
                raise JsonStatException(msg)

            apos[dim.pos] = dim.category(val).pos
        return apos

    def lint_as_idx(self, lst):
        """from a list of position get a index into value array

        [1,2,3] -> 10
        :param lst: list of integer
        :returns: an integer index into values
        """
        s = np.array(self.__pos2mult)
        r = s * lst
        return np.sum(r)

    def idx_as_lint(self, idx):
        """ 10 -> [<int1>, <int2>, ...]
        """
        lint = self.__dim_nr * [0]
        i = len(self.__pos2size) - 1
        while idx > 0 and i != 0:
            lint[i] = idx % self.__pos2size[i]
            idx -= (lint[i] * self.__pos2mult[i])
            i -= 1
        return lint

    def idx_as_lcat(self, idx):
        lint = self.idx_as_lint(idx)
        lcat = self.lint_as_lcat(lint)
        return lcat

    def lint_as_lcat(self, lint, without_one_dimension=False):
        """transforms an array of int into an array of cat

        [0,3,4] -> ['dimension 1 index', 'dimension 2 label', 'dimension 3 label']

        :param lint:  [0,3,4]
        :returns: ['dimension 1 index', 'dimension 2 label', 'dimension 3 label']
        """
        lcat = []
        for pos, lint_pos in enumerate(lint):
            dim = self.__pos2dim[pos]
            if not (without_one_dimension and len(dim) == 1):
                cat = dim._pos2cat(lint_pos).index
                lcat.append(cat)
        return lcat

    def _lint_to_llbl(self, apos, without_one_dimension=False):
        """transforms on array of dim into an array of label

        :param apos:  [0,3,4]
        :returns: ['dimension 1 label or index', 'dimension 2 label  or index', 'dimension 3 label  or index']
        """

        # vec_idx = len(vec_pos) * [None]
        aidx = []
        for pos in range(len(apos)):
            dim = self.__pos2dim[pos]
            lbl = dim._pos2cat(apos[pos]).label
            if lbl is None:
                lbl = dim._pos2cat(apos[pos]).index

            # vec_idx[i] = lbl
            if not (without_one_dimension and len(dim) == 1):
                aidx.append(lbl)

        return aidx

    def _from_aidx_to_adim(self, ldid):
        """From a list of dimension name to a list of numerical dimension position

          F.e.
          ["year", "country"] -> [1,0]
          ["country", "year"] -> [0,1]

        :returns: list of number
        """
        return [self.__did2dim[did].pos for did in ldid]

    #
    # generators
    #

    def all_pos(self, blocked_dims={}, order=None):
        """all_pos doc

        :param blocked_dims:  {"year":2013, country:"IT"}
        :param order: order
        :returns:
        """

        nr_dim = len(self.__pos2dim)
        if order is not None:
            if len(order) != nr_dim:
                msg = "length of the order vector is different from number of dimension {}".format(
                    nr_dim)
                raise JsonStatException(msg)
            if not isinstance(order[1], int):
                order = [self.__did2dim[iid].pos for iid in order]

        vec_pos_blocked = nr_dim * [False]
        vec_pos = nr_dim * [0]

        for (cat, idx) in blocked_dims.items():
            d = self.dimension(cat)
            vec_pos_blocked[d.pos] = True
            vec_pos[d.pos] = d._idx2pos(idx)

        pos2size = self.__pos2size

        if order is None:
            vec_dimension_reorder = range(nr_dim)
        else:
            vec_dimension_reorder = order

        nrd = nr_dim - 1
        while nrd >= 0:

            yield list(vec_pos)  # make a shallow copy of vec_pos

            nrd = nr_dim - 1
            cur_dim = vec_dimension_reorder[nrd]
            # se la posizione non e bloccata allora puoi far andare avanti la cifra
            if not vec_pos_blocked[cur_dim]:
                vec_pos[cur_dim] += 1

            # se non si arrivati all'ultima dimensione
            # e se la dimensione corrente non e al massimo valore o se la dimensione corrente e bloccata
            while nrd >= 0 and \
                    (vec_pos[cur_dim] == pos2size[cur_dim] or vec_pos_blocked[cur_dim]):

                # se la posizione non e' bloccata allora puoi far partire il valore dall'inizio
                if not vec_pos_blocked[cur_dim]:
                    vec_pos[cur_dim] = 0

                # esamina la prossima posizione
                nrd -= 1
                # se la dimensione corrente non e' la prima
                if nrd >= 0:
                    cur_dim = vec_dimension_reorder[nrd]
                    # se la dimensione corrente non e bloccata puoi farla avanzare
                    if not vec_pos_blocked[cur_dim]:
                        vec_pos[cur_dim] += 1

    def generate_all_vec(self, **blocked_dims):
        for vec_pos in self.all_pos(blocked_dims):
            vec_idx = self.lint_as_lcat(vec_pos)
            value = self.__value_from_vec_pos(vec_pos)

    #
    # transforming function
    #

[docs]    def to_table(self,
                 content="label",
                 order=None,
                 rtype=list,
                 blocked_dims={},
                 value_column="Value",
                 without_one_dimensions=False):
        """Transforms a dataset into a table (a list of row)

        table len is the size of dataset + 1 for headers

        :param content: can be "label" or "id"
        :param order:
        :param rtype:
        :param blocked_dims:
        :returns: a list of row, first line is the header
        """
        table = []

        # header
        if content == "label":
            header = [dim.label for dim in self.__pos2dim]
        else:
            header = [dim.did for dim in self.__pos2dim]

        header.append(value_column)

        # data
        table.append(header)
        for apos in self.all_pos(order=order, blocked_dims=blocked_dims):
            value = self.__value_from_vec_pos(apos)
            if content == "label":
                row = self._lint_to_llbl(
                    apos, without_one_dimension=without_one_dimensions)
            else:
                row = self.lint_as_lcat(
                    apos, without_one_dimension=without_one_dimensions)
            row.append(value)
            table.append(row)

        if rtype == pd.DataFrame:
            ret = pd.DataFrame(table[1:], columns=table[0])
        else:
            ret = table

        return ret

[docs]    def to_data_frame(self,
                      index=None,
                      content="label",
                      order=None,
                      blocked_dims={},
                      value_column="Value"):
        """Transform dataset to pandas data frame

        extract_bidimensional("year", "country")
        generate the following dataframe:
        year  |  country
        2010  |  1
        2011  |  2
        2012  |  3

        :param index:
        :param content:
        :param blocked_dims:
        :param order:
        :param value_column:

        :returns:
        """

        df = self.to_table(content=content,
                           order=order,
                           rtype=pd.DataFrame,
                           blocked_dims=blocked_dims,
                           value_column=value_column)
        # TODO: avoid creating a new dataframe (?)
        # df.index = df[index]
        # del df[index]
        if index:
            df = df.set_index([index])
        return df

    #
    # Parsing code
    #

[docs]    def from_file(self, filename):
        """read a jsonstat from a file and parse it to initialize this dataset.

        It is better to use :py:meth:`jsonstat.from_file`

        :param filename: path of the file.
        :returns: itself to chain calls
        """
        with open(filename) as f:
            json_string = f.read()
            self.from_string(json_string)
        return self

[docs]    def from_string(self, json_string):
        """parse a string containing a jsonstat and initialize this dataset

        It is better to use :py:meth:`jsonstat.from_string`

        :param json_string: string containing a jsonstat
        :returns: itself to chain calls
        """
        json_data = json.loads(json_string)
        self.from_json(json_data)
        return self

[docs]    def from_json(self, json_data):
        """parse a json structure and initialize this dataset

        It is better to use py:meth:`jsonstat.from_json`

        :param json_data: json structure
        :returns: itself to chain calls
        """
        if "version" in json_data:
            # assume version 2
            self._from_json_v2(json_data)
        else:
            self._from_json_v1(json_data)
        return self

    def _from_json_v1(self, json_data):
        """parse a json structure according to jsonstat format version 1.x

        .. warning::

            this is an internal library function (it is not public api)

        :param json_data: json structure
        """

        if 'label' in json_data:
            self.__label = json_data['label']
            if self.__name is None:
                self.__name = self.__label

        if 'source' in json_data:
            self.__source = json_data['source']

        if 'title' in json_data:
            self.__title = json_data['title']

        # parsing value
        if 'value' not in json_data:
            msg = "dataset '{}': missing 'value' key".format(self.__name)
            raise JsonStatMalformedJson(msg)
        self.__value = json_data['value']

        if len(self.__value) == 0:
            msg = "dataset '{}': field 'value' is empty".format(self.__name)
            raise JsonStatMalformedJson(msg)

        # https://json-stat.org/format/#status
        # parsing status
        #
        # eurostat has the following structure for status
        # status : {
        #   'value' : { "": "" }
        #   'category' : { ... }
        # }

        if 'status' in json_data:
            self.__status = json_data['status']
            if isinstance(self.__status, list):
                if len(self.__status) != 1 and len(self.__status) != len(
                        self.__value):
                    msg = "dataset '{}': incorrect size of status fields"
                    raise JsonStatMalformedJson(msg)
            if isinstance(self.__status, dict):
                # convert key into int
                # eurostat data has incorrect status { "":"" }
                nd = {}
                for k, v in self.__status.items():
                    try:
                        nd[int(k)] = v
                    except ValueError:
                        pass
                self.__status = nd

        #
        # parsing dimension
        #
        if 'dimension' not in json_data:
            msg = "dataset '{}': missing 'dimension' key".format(self.__name)
            raise JsonStatMalformedJson(msg)

        json_data_dimension = json_data['dimension']

        if 'id' not in json_data_dimension:
            msg = "dataset '{}': missing 'dimension.id' key".format(
                self.__name)
            raise JsonStatMalformedJson(msg)

        if 'size' not in json_data_dimension:
            msg = "dataset '{}': missing 'dimension.size' key".format(
                self.__name)
            raise JsonStatMalformedJson(msg)

        pos2iid = json_data_dimension['id']

        self.__pos2size = json_data_dimension['size']
        # https://github.com/26fe/jsonstat.py/issues/1
        # cso.ie expose dimension sizes as strings instead of integers.
        for i, e in enumerate(self.__pos2size):
            self.__pos2size[i] = int(e)

        self.__dim_nr = len(pos2iid)

        # validate dimension
        if len(pos2iid) != len(self.__pos2size):
            msg = "dataset '{}': dataset_id is different of dataset_size".format(
                self.__name)
            raise JsonStatMalformedJson(msg)

        json_data_roles = None
        if 'role' in json_data_dimension:
            json_data_roles = json_data_dimension['role']
        self.__parse_dimensions(json_data_dimension, json_data_roles, pos2iid)

        # validate
        size_total = reduce(lambda x, y: x * y, self.__pos2size)
        if len(self.__value) != size_total:
            msg = "dataset '{}': size {} is different from calculate size {} by dimension"
            msg = msg.format(self.__name, len(self.__value), size_total)
            raise JsonStatMalformedJson(msg)

        self.__compute_pos2mult()
        self.__valid = True

    def _from_json_v2(self, json_data):
        """parse a jsonstat structure compliant to jsonstat format version 2.x

        .. warning::

            this is an internal library function (it is not public api)

        :param json_data: json structure

        keys to be parsed
        - version
        - class: "dataset"

        - href: url
        - label: "..."
        - id: <list of dimension id>
        - size: <list of integer, size of dimension>
        - role: roles of dimension
        - value: <list of values>
        - status
        - dimension
        - link

        ::

            {
                "class" : "dataset",
                "href" : "http://json-stat.org/samples/oecd.json",
                "label" : "Unemployment rate in the OECD countries 2003-2014"
             }

        """

        if "href" in json_data:
            self.__href = json_data["href"]

        if 'label' in json_data:
            self.__label = json_data['label']
            if self.__name is None:
                self.__name = self.__label

        if "id" not in json_data:
            if "href" in json_data:
                # todo skip the next section???
                # todo: download data?
                return

        # value is required
        # https://json-stat.org/format/#value
        # TODO: value into a numpy array?
        self.__value = json_data['value']
        if len(self.__value) == 0:
            msg = "dataset '{}': field 'value' is empty".format(self.__name)
            raise JsonStatMalformedJson(msg)

        pos2iid = json_data['id']

        self.__pos2size = json_data['size']
        # https://github.com/26fe/jsonstat.py/issues/1
        # cso.ie expose dimension sizes as strings instead of integers.
        for i, e in enumerate(self.__pos2size):
            self.__pos2size[i] = int(e)

        self.__dim_nr = len(pos2iid)

        # validate len(ids) == len(sizes)
        if len(pos2iid) != len(self.__pos2size):
            msg = "dataset '{}': dataset_id is different of dataset_size".format(
                self.__name)
            raise JsonStatMalformedJson(msg)

        # https://json-stat.org/format/#status
        # parsing status
        if 'status' in json_data:
            self.__status = json_data['status']
            if isinstance(self.__status, list):
                if len(self.__status) != 1 and len(self.__status) != len(
                        self.__value):
                    msg = "dataset '{}': incorrect size of status fields"
                    raise JsonStatMalformedJson(msg)
            if isinstance(self.__status, dict):
                # convert key into int
                self.__status = {int(k): v for k, v in self.__status.items()}

        # dimension
        json_data_roles = None
        if 'role' in json_data:
            json_data_roles = json_data['role']
        json_data_dimension = json_data["dimension"]
        self.__parse_dimensions(json_data_dimension, json_data_roles, pos2iid)

        # TODO: parsing link

        self.__compute_pos2mult()
        self.__valid = True

    def __parse_dimensions(self, json_data_dimension, json_data_roles,
                           pos2iid):
        """Parse dimension in json stat

        it used for format v1 and v2

        :param json_data_dimension:
        :param json_data_roles:
        :returns:
        """

        # parsing roles
        roles = {}
        if json_data_roles is not None:
            json_roles = json_data_roles
            for r in json_roles.items():
                role = r[0]
                for dname in r[1]:
                    roles[dname] = role

        # parsing each dimensions
        self.__pos2dim = self.__dim_nr * [None]
        for dpos, dname in enumerate(pos2iid):
            dsize = self.__pos2size[dpos]

            if dname not in json_data_dimension:
                msg = "dataset '{}': malformed json: missing key {} in dimension".format(
                    self.__name, dname)
                raise JsonStatException(msg)

            dimension = JsonStatDimension(dname, dsize, dpos, roles.get(dname))
            dimension.from_json(json_data_dimension[dname])
            self.__did2dim[dname] = dimension
            self.__pos2dim[dpos] = dimension
            if dimension.label is not None:
                self.__lbl2dim[dimension.label] = dimension

    def __compute_pos2mult(self):
        acc = 1
        self.__pos2mult = self.__dim_nr * [1]
        i = self.__dim_nr - 2
        while i >= 0:
            acc = acc * self.__pos2size[i + 1]
            self.__pos2mult[i] = acc
            i -= 1