Source code for lale.lib.autogen.latent_dirichlet_allocation


from sklearn.decomposition.online_lda import LatentDirichletAllocation as SKLModel
import lale.helpers
import lale.operators
from numpy import nan, inf

[docs]class LatentDirichletAllocationImpl(): def __init__(self, n_components=10, doc_topic_prior=None, topic_word_prior=None, learning_method='batch', learning_decay=0.7, learning_offset=10.0, max_iter=10, batch_size=128, evaluate_every=(- 1), total_samples=1000000.0, perp_tol=0.1, mean_change_tol=0.001, max_doc_update_iter=100, n_jobs=None, verbose=0, random_state=None, n_topics=None): self._hyperparams = { 'n_components': n_components, 'doc_topic_prior': doc_topic_prior, 'topic_word_prior': topic_word_prior, 'learning_method': learning_method, 'learning_decay': learning_decay, 'learning_offset': learning_offset, 'max_iter': max_iter, 'batch_size': batch_size, 'evaluate_every': evaluate_every, 'total_samples': total_samples, 'perp_tol': perp_tol, 'mean_change_tol': mean_change_tol, 'max_doc_update_iter': max_doc_update_iter, 'n_jobs': n_jobs, 'verbose': verbose, 'random_state': random_state, 'n_topics': n_topics}
[docs] def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self
[docs] def transform(self, X): return self._sklearn_model.transform(X)
_hyperparams_schema = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'inherited docstring for LatentDirichletAllocation Latent Dirichlet Allocation with online variational Bayes algorithm', 'allOf': [{ 'type': 'object', 'required': ['n_components', 'doc_topic_prior', 'topic_word_prior', 'learning_method', 'learning_decay', 'learning_offset', 'max_iter', 'batch_size', 'evaluate_every', 'total_samples', 'perp_tol', 'mean_change_tol', 'max_doc_update_iter', 'n_jobs', 'verbose', 'random_state', 'n_topics'], 'relevantToOptimizer': ['n_components', 'learning_method', 'max_iter', 'batch_size', 'evaluate_every', 'total_samples', 'max_doc_update_iter'], 'additionalProperties': False, 'properties': { 'n_components': { 'type': 'integer', 'minimumForOptimizer': 2, 'maximumForOptimizer': 256, 'distribution': 'uniform', 'default': 10, 'description': 'Number of topics.'}, 'doc_topic_prior': { 'anyOf': [{ 'type': 'number'}, { 'enum': [None]}], 'default': None, 'description': 'Prior of document topic distribution `theta`. If the value is None,'}, 'topic_word_prior': { 'anyOf': [{ 'type': 'number'}, { 'enum': [None]}], 'default': None, 'description': 'Prior of topic word distribution `beta`. If the value is None, defaults'}, 'learning_method': { 'enum': ['batch', 'online'], 'default': 'batch', 'description': 'Method used to update `_component`. Only used in `fit` method.'}, 'learning_decay': { 'type': 'number', 'default': 0.7, 'description': 'It is a parameter that control learning rate in the online learning'}, 'learning_offset': { 'type': 'number', 'default': 10.0, 'description': 'A (positive) parameter that downweights early iterations in online'}, 'max_iter': { 'type': 'integer', 'minimumForOptimizer': 10, 'maximumForOptimizer': 1000, 'distribution': 'uniform', 'default': 10, 'description': 'The maximum number of iterations.'}, 'batch_size': { 'type': 'integer', 'minimumForOptimizer': 3, 'maximumForOptimizer': 128, 'distribution': 'uniform', 'default': 128, 'description': 'Number of documents to use in each EM iteration. Only used in online'}, 'evaluate_every': { 'type': 'integer', 'minimumForOptimizer': (- 1), 'maximumForOptimizer': 0, 'distribution': 'uniform', 'default': (- 1), 'description': 'How often to evaluate perplexity. Only used in `fit` method.'}, 'total_samples': { 'anyOf': [{ 'type': 'integer', 'forOptimizer': False}, { 'type': 'number', 'minimumForOptimizer': 0.0, 'maximumForOptimizer': 1.0, 'distribution': 'uniform'}], 'default': 1000000.0, 'description': 'Total number of documents. Only used in the `partial_fit` method.'}, 'perp_tol': { 'type': 'number', 'default': 0.1, 'description': 'Perplexity tolerance in batch learning. Only used when'}, 'mean_change_tol': { 'type': 'number', 'default': 0.001, 'description': 'Stopping tolerance for updating document topic distribution in E-step.'}, 'max_doc_update_iter': { 'type': 'integer', 'minimumForOptimizer': 100, 'maximumForOptimizer': 101, 'distribution': 'uniform', 'default': 100, 'description': 'Max number of iterations for updating document topic distribution in'}, 'n_jobs': { 'anyOf': [{ 'type': 'integer'}, { 'enum': [None]}], 'default': None, 'description': 'The number of jobs to use in the E-step.'}, 'verbose': { 'type': 'integer', 'default': 0, 'description': 'Verbosity level.'}, 'random_state': { 'anyOf': [{ 'type': 'integer'}, { 'type': 'object'}, { 'enum': [None]}], 'default': None, 'description': 'If int, random_state is the seed used by the random number generator;'}, 'n_topics': { 'anyOf': [{ 'type': 'integer'}, { 'enum': [None]}], 'default': None, 'description': 'This parameter has been renamed to n_components and will'}, }}, { 'description': 'learning_method, only used in fit method', 'anyOf': [{ 'type': 'object', 'properties': { 'learning_method': { 'enum': ['batch']}, }}, { 'type': 'object', 'properties': { 'method': { 'enum': ['fit']}, }}]}, { 'description': 'batch_size, only used in online learning', 'anyOf': [{ 'type': 'object', 'properties': { 'batch_size': { 'enum': [128]}, }}, { 'type': 'object', 'properties': { 'learning': { 'enum': ['online']}, }}]}, { 'description': 'evaluate_every, only used in fit method', 'anyOf': [{ 'type': 'object', 'properties': { 'evaluate_every': { 'enum': [(- 1)]}, }}, { 'type': 'object', 'properties': { 'method': { 'enum': ['fit']}, }}]}, { 'description': 'total_samples, only used in the partial_fit method', 'anyOf': [{ 'type': 'object', 'properties': { 'total_samples': { 'enum': [1000000.0]}, }}, { 'type': 'object', 'properties': { 'method': { 'enum': ['partial_fit']}, }}]}, { 'XXX TODO XXX': 'Parameter: perp_tol > only used when evaluate_every is greater than 0'}], } _input_fit_schema = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'Learn model for the data X with variational Bayes method.', 'type': 'object', 'properties': { 'X': { 'anyOf': [{ 'type': 'array', 'items': { 'XXX TODO XXX': 'item type'}, 'XXX TODO XXX': 'array-like or sparse matrix, shape=(n_samples, n_features)'}, { 'type': 'array', 'items': { 'type': 'array', 'items': { 'type': 'number'}, }}], 'description': 'Document word matrix.'}, 'y': { }}, } _input_transform_schema = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'Transform data X according to the fitted model.', 'type': 'object', 'properties': { 'X': { 'anyOf': [{ 'type': 'array', 'items': { 'XXX TODO XXX': 'item type'}, 'XXX TODO XXX': 'array-like or sparse matrix, shape=(n_samples, n_features)'}, { 'type': 'array', 'items': { 'type': 'array', 'items': { 'type': 'number'}, }}], 'description': 'Document word matrix.'}, }, } _output_transform_schema = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'Document topic distribution for X.', 'XXX TODO XXX': 'shape=(n_samples, n_components)', } _combined_schemas = { '$schema': 'http://json-schema.org/draft-04/schema#', 'description': 'Combined schema for expected data and hyperparameters.', 'type': 'object', 'tags': { 'pre': [], 'op': ['transformer'], 'post': []}, 'properties': { 'hyperparams': _hyperparams_schema, 'input_fit': _input_fit_schema, 'input_transform': _input_transform_schema, 'output_transform': _output_transform_schema}, } if (__name__ == '__main__'): lale.helpers.validate_is_schema(_combined_schemas) LatentDirichletAllocation = lale.operators.make_operator(LatentDirichletAllocationImpl, _combined_schemas)