Source code for lale.lib.sklearn.logistic_regression

# Copyright 2019 IBM Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import lale.helpers
import lale.operators
import sklearn.linear_model

[docs]class LogisticRegressionImpl: def __init__(self, **hyperparams): self._hyperparams = hyperparams
[docs] def fit(self, X, y, **fit_params): self._sklearn_model = sklearn.linear_model.LogisticRegression( **self._hyperparams) if fit_params is None: self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X, y, **fit_params) return self
[docs] def predict(self, X): return self._sklearn_model.predict(X)
[docs] def predict_proba(self, X): return self._sklearn_model.predict_proba(X)
_input_schema_fit = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'Input data schema for training.', 'type': 'object', 'required': ['X', 'y'], 'additionalProperties': False, 'properties': { 'X': { 'description': 'Features; the outer array is over samples.', 'type': 'array', 'items': {'type': 'array', 'items': {'type': 'number'}}}, 'y': { 'description': 'Target class labels; the array is over samples.', 'type': 'array', 'items': {'type': 'number'}}}} _input_schema_predict = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'Input data schema for predictions.', 'type': 'object', 'required': ['X'], 'additionalProperties': False, 'properties': { 'X': { 'description': 'Features; the outer array is over samples.', 'type': 'array', 'items': {'type': 'array', 'items': {'type': 'number'}}}}} _output_schema = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'Output data schema for predictions (target class labels).', 'anyOf': [ { 'description': 'For predict, class label.', 'type': 'array', 'items': { 'type': 'number'}}, { 'description': 'For predict_proba, for each sample, vector of probabilities.', 'type': 'array', 'items': { 'type': 'array', 'items': { 'type': 'number' }}}]} _hyperparams_schema = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'Hyperparameter schema.', 'allOf': [ { 'description': 'This first sub-object lists all constructor arguments with their ' 'types, one at a time, omitting cross-argument constraints.', 'type': 'object', 'additionalProperties': False, 'required': [ 'penalty', 'dual', 'tol', 'C', 'fit_intercept', 'intercept_scaling', 'class_weight', 'random_state', 'solver', 'max_iter', 'multi_class', 'verbose', 'warm_start', 'n_jobs'], 'relevantToOptimizer': [ 'penalty', 'dual', 'tol', 'C', 'fit_intercept', 'class_weight', 'solver', 'multi_class'], 'properties': { 'solver': { 'description': 'Algorithm for optimization problem.', 'enum': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'], 'default': 'liblinear'}, 'penalty': { 'description': 'Norm used in the penalization.', 'enum': ['l1', 'l2'], 'default': 'l2'}, 'dual': { 'description': 'Dual or primal formulation.', 'type': 'boolean', 'default': False}, 'C': { 'description': 'Inverse regularization strength. Smaller values specify ' 'stronger regularization.', 'type': 'number', 'distribution': 'loguniform', 'minimum': 0.0, 'exclusiveMinimum': True, 'default': 1.0, 'minimumForOptimizer': 0.03125, 'maximumForOptimizer': 32768}, 'tol': { 'description': 'Tolerance for stopping criteria.', 'type': 'number', 'distribution': 'loguniform', 'minimum': 0.0, 'exclusiveMinimum': True, 'default': 0.0001, 'minimumForOptimizer': 1e-05, 'maximumForOptimizer': 0.1}, 'fit_intercept': { 'description': 'Specifies whether a constant (bias or intercept) should be ' 'added to the decision function.', 'type': 'boolean', 'default': True}, 'intercept_scaling': { 'description': 'Append a constant feature with constant value ' 'intercept_scaling to the instance vector.', 'type': 'number', 'distribution': 'loguniform', 'minimum': 0.0, 'exclusiveMinimum': True, 'default': 1.0}, 'class_weight': { 'anyOf': [ { 'description': 'By default, all classes have weight 1.', 'enum': [None]}, { 'description': 'Adjust weights by inverse frequency.', 'enum': ['balanced']}, { 'description': 'Dictionary mapping class labels to weights.', 'type': 'object', 'propertyNames': {'pattern': '^.+$', 'type': 'number'}, 'forOptimizer': False}], 'default': None}, 'random_state': { 'description': 'Seed of pseudo-random number generator for shuffling data.', 'anyOf': [ { 'description': 'RandomState used by np.random', 'enum': [None]}, { 'description': 'Explicit seed.', 'type': 'integer'}], 'default': None}, 'max_iter': { 'description': 'Maximum number of iterations for solvers to converge.', 'type': 'integer', 'distribution': 'loguniform', 'minimum': 1, 'default': 100}, 'multi_class': { 'description': 'Approach for more than two classes (not binary classifier).', 'enum': ['ovr', 'multinomial', 'auto'], 'default': 'ovr'}, 'verbose': { 'description': 'For the liblinear and lbfgs solvers set verbose to any positive ' 'number for verbosity.', 'type': 'integer', 'default': 0}, 'warm_start': { 'description': 'If true, initialize with solution of previous call to fit.', 'type': 'boolean', 'default': False}, 'n_jobs': { 'description': 'Number of CPU cores when parallelizing over classes if ' 'multi_class is ovr.', 'anyOf': [ { 'description': '1 unless in joblib.parallel_backend context.', 'enum': [None]}, { 'description': 'Use all processors.', 'enum': [-1]}, { 'description': 'Number of CPU cores.', 'type': 'integer', 'minimum': 1}], 'default': None}}}, { 'description': 'The newton-cg, sag, and lbfgs solvers support only l2 penalties.', 'anyOf': [ { 'type': 'object', 'properties': { 'solver': {'not': {'enum': ['newton-cg', 'sag', 'lbfgs']}}}}, { 'type': 'object', 'properties': {'penalty': {'enum': ['l2']}}}]}, { 'description': 'The dual formulation is only implemented for l2 ' 'penalty with the liblinear solver.', 'anyOf': [ { 'type': 'object', 'properties': {'dual': {'enum': [False]}}}, { 'type': 'object', 'properties': { 'penalty': {'enum': ['l2']}, 'solver': {'enum': ['liblinear']}}}]}, { 'description': 'Setting intercept_scaling is useful only when the solver is ' 'liblinear and fit_intercept is true.', 'anyOf': [ { 'type': 'object', 'properties': {'intercept_scaling': {'enum': [1.0]}}}, { 'type': 'object', 'properties': { 'fit_intercept': {'enum': [True]}, 'solver': {'enum': ['liblinear']}}}]}, { 'description': 'Setting max_iter is only useful for the newton-cg, sag, ' 'lbfgs solvers.', 'anyOf': [ { 'type': 'object', 'properties': {'max_iter': {'enum': [100]}}}, { 'type': 'object', 'properties': { 'solver': {'enum': ['newton-cg', 'sag', 'lbfgs']}}}]}, { 'description': 'The multi_class multinomial option is unavailable when the ' 'solver is liblinear.', 'anyOf': [ { 'type': 'object', 'properties': { 'multi_class': {'not': {'enum': ['multinomial']}}}}, { 'type': 'object', 'properties': { 'solver': {'not': {'enum': ['liblinear']}}}}]}]} _combined_schemas = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'Combined schema for expected data and hyperparameters.', 'documentation_url': 'http://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html', 'type': 'object', 'tags': { 'pre': ['~categoricals'], 'op': ['estimator', 'classifier', 'interpretable'], 'post': ['probabilities']}, 'properties': { 'input_fit': _input_schema_fit, 'input_predict': _input_schema_predict, 'output': _output_schema, 'hyperparams': _hyperparams_schema } } if __name__ == "__main__": lale.helpers.validate_is_schema(_combined_schemas) LogisticRegression = lale.operators.make_operator(LogisticRegressionImpl, _combined_schemas)