Source code for lale.json_operator

import importlib
import jsonschema
import lale.helpers
import lale.operators
import lale.pretty_print
import logging

logger = logging.getLogger(__name__)

SCHEMA = {
  '$schema': 'http://json-schema.org/draft-04/schema#',
  'definitions': {
    'operator': {
      'anyOf': [
        {'$ref': '#/definitions/planned_individual_op'},
        {'$ref': '#/definitions/trainable_individual_op'},
        {'$ref': '#/definitions/trained_individual_op'},
        {'$ref': '#/definitions/planned_pipeline'},
        {'$ref': '#/definitions/trainable_pipeline'},
        {'$ref': '#/definitions/trained_pipeline'},
        {'$ref': '#/definitions/operator_choice'}]},
    'individual_op': {
      'type': 'object',
      'required': ['class', 'state', 'operator'],
      'properties': {
        'class': {
          'type': 'string',
          'pattern': '^([A-Za-z_][A-Za-z_0-9]*[.])*[A-Za-z_][A-Za-z_0-9]*$'},
        'state': {
          'enum': ['metamodel', 'planned', 'trainable', 'trained'] },
        'operator': {
          'type': 'string',
          'pattern': '^[A-Za-z_][A-Za-z_0-9]*$'},
        'documentation_url': {
          'type': 'string'},
        'hyperparams': {
          'anyOf': [
            { 'enum': [None]},
            { 'type': 'object',
              'patternProperties': {'^[A-Za-z_][A-Za-z_0-9]*$': {}}}]},
        'coefs': {
          'enum': [None, 'coefs_not_available']}}},
    'planned_individual_op': {
      'allOf': [
        { '$ref': '#/definitions/individual_op'},
        { 'type': 'object',
          'properties': { 'state': { 'enum': ['planned']}}}]},
    'trainable_individual_op': {
      'allOf': [
        { '$ref': '#/definitions/individual_op'},
        { 'type': 'object',
          'required': ['hyperparams'],
          'properties': { 'state': { 'enum': ['trainable']}}}]},
    'trained_individual_op': {
      'allOf': [
        { '$ref': '#/definitions/individual_op'},
        { 'type': 'object',
          'required': ['hyperparams', 'coefs'],
          'properties': { 'state': { 'enum': ['trained']}}}]},
    'pipeline': {
      'type': 'object',
      'required': ['class', 'state', 'edges', 'steps'],
      'properties': {
        'class': {
          'enum': [
            'lale.operators.PlannedPipeline',
            'lale.operators.TrainablePipeline',
            'lale.operators.TrainedPipeline']},
        'state': {
          'enum': ['planned', 'trainable', 'trained']},
        'edges': {
          'type': 'array',
          'items': {
            'type': 'array',
            'minItems': 2, 'maxItems': 2,
            'items': {'type': 'integer'}}},
        'steps': {
          'type': 'array',
          'items': {'$ref': '#/definitions/operator'}}}},
    'planned_pipeline': {
      'allOf': [
        { '$ref': '#/definitions/pipeline'},
        { 'type': 'object',
          'properties': {
            'state': { 'enum': ['planned']},
            'class': { 'enum': ['lale.operators.PlannedPipeline']}}}]},
    'trainable_pipeline': {
      'allOf': [
        { '$ref': '#/definitions/pipeline'},
        { 'type': 'object',
          'properties': {
            'state': { 'enum': ['trainable']},
            'class': { 'enum': ['lale.operators.TrainablePipeline']},
            'steps': {
              'type': 'array',
              'items': {
                'type': 'object',
                'properties': {
                  'state': { 'enum': ['trainable', 'trained']}}}}}}]},
    'trained_pipeline': {
      'allOf': [
        { '$ref': '#/definitions/pipeline'},
        { 'type': 'object',
          'properties': {
            'state': { 'enum': ['trained']},
            'class': { 'enum': ['lale.operators.TrainedPipeline']},
            'steps': {
              'type': 'array',
              'items': {
                'type': 'object',
                'properties': {
                  'state': { 'enum': ['trained']}}}}}}]},
    'operator_choice': {
      'type': 'object',
      'required': ['class', 'state', 'operator', 'steps'],
      'properties': {
        'class': {
          'enum': ['lale.operators.OperatorChoice']},
        'state': {
          'enum': ['planned']},
        'operator': {
          'type': 'string'},
        'steps': {
          'type': 'array',
          'items': {'$ref': '#/definitions/operator'}}}}},
  '$ref': '#/definitions/operator'}

if __name__ == "__main__":
    lale.helpers.validate_is_schema(SCHEMA)

[docs]def to_json(op): result = {} result['class'] = op.class_name() if isinstance(op, lale.operators.Trained): result['state'] = 'trained' elif isinstance(op, lale.operators.Trainable): result['state'] = 'trainable' elif isinstance(op, lale.operators.Planned): result['state'] = 'planned' elif isinstance(op, lale.operators.MetaModel): result['state'] = 'metamodel' if isinstance(op, lale.operators.IndividualOp): result['operator'] = op.name() documentation_url = op.documentation_url() if documentation_url is not None: result['documentation_url'] = documentation_url if isinstance(op, lale.operators.TrainableIndividualOp): result['hyperparams'] = op.hyperparams() if isinstance(op, lale.operators.TrainedIndividualOp): if hasattr(op._impl, 'fit'): result['coefs'] = 'coefs_not_available' else: result['coefs'] = None elif isinstance(op, lale.operators.Pipeline): node2id = {s: i for (i, s) in enumerate(op.steps())} result['edges'] = [[node2id[x], node2id[y]] for (x, y) in op.edges()] result['steps'] = [s.to_json() for s in op.steps()] elif isinstance(op, lale.operators.OperatorChoice): result['operator'] = op.name() result['state'] = 'planned' result['steps'] = [s.to_json() for s in op.steps()] jsonschema.validate(result, SCHEMA) return result
[docs]def from_json(json): jsonschema.validate(json, SCHEMA) if 'steps' in json and 'edges' in json: steps = [from_json(s) for s in json['steps']] edges = [(steps[e[0]], steps[e[1]]) for e in json['edges']] return lale.operators.get_pipeline_of_applicable_type(steps, edges) elif 'steps' in json: steps = [from_json(s) for s in json['steps']] name = json['operator'] return lale.operators.OperatorChoice(steps, name) else: name = json['operator'] full_class_name = json['class'] last_period = full_class_name.rfind('.') module = importlib.import_module(full_class_name[:last_period]) impl_class = getattr(module, full_class_name[last_period+1:]) impl = impl_class() schemas = None #IndividualOp.__init__ should look up the schemas planned = lale.operators.PlannedIndividualOp(name, impl, schemas) if json['state'] == 'planned': return planned assert json['state'] in ['trainable', 'trained'], json["state"] if json['hyperparams'] is None: trainable = planned() else: trainable = planned(**json['hyperparams']) if json['state'] == 'trained': if json['coefs']=='coefs_not_available': logger.warning(f'Since the JSON representation of trained operator {name} lacks coefficients, from_json returns a trainable operator instead.') else: assert json['coefs'] is None, json['coefs'] trained = lale.operators.TrainedIndividualOp( name, trainable._impl, schemas) return trained return trainable assert False, f'unexpected JSON {json}'