tmtoolkit
v0.10.0.post1
Contents:
Installation
Getting started
Working with text corpora
Text preprocessing
Working with the Bag-of-Words representation
Topic modeling
API
Version history
tmtoolkit
»
Index
Edit on GitHub
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
I
|
J
|
K
|
L
|
M
|
N
|
P
|
R
|
S
|
T
|
U
|
V
|
W
_
__contains__() (tmtoolkit.corpus.Corpus method)
__copy__() (tmtoolkit.preprocess.TMPreproc method)
__deepcopy__() (tmtoolkit.corpus.Corpus method)
(tmtoolkit.preprocess.TMPreproc method)
__del__() (tmtoolkit.preprocess.TMPreproc method)
__delitem__() (tmtoolkit.corpus.Corpus method)
__getitem__() (tmtoolkit.corpus.Corpus method)
__init__() (tmtoolkit.corpus.Corpus method)
,
[1]
(tmtoolkit.preprocess.TMPreproc method)
,
[1]
(tmtoolkit.topicmod.parallel.MultiprocEvaluationRunner method)
(tmtoolkit.topicmod.parallel.MultiprocEvaluationWorkerABC method)
(tmtoolkit.topicmod.parallel.MultiprocModelsRunner method)
(tmtoolkit.topicmod.parallel.MultiprocModelsWorkerABC method)
__setitem__() (tmtoolkit.corpus.Corpus method)
A
add_doc() (tmtoolkit.corpus.Corpus method)
add_files() (tmtoolkit.corpus.Corpus method)
add_folder() (tmtoolkit.corpus.Corpus method)
add_metadata_per_doc() (tmtoolkit.preprocess.TMPreproc method)
add_metadata_per_token() (tmtoolkit.preprocess.TMPreproc method)
add_special_chars() (tmtoolkit.preprocess.TMPreproc method)
add_stopwords() (tmtoolkit.preprocess.TMPreproc method)
add_tabular() (tmtoolkit.corpus.Corpus method)
add_zip() (tmtoolkit.corpus.Corpus method)
apply() (tmtoolkit.corpus.Corpus method)
apply_custom_filter() (tmtoolkit.preprocess.TMPreproc method)
argsort() (in module tmtoolkit.utils)
AVAILABLE_METRICS (in module tmtoolkit.topicmod.tm_gensim)
(in module tmtoolkit.topicmod.tm_lda)
(in module tmtoolkit.topicmod.tm_sklearn)
B
builtin_corpora() (tmtoolkit.corpus.Corpus static method)
C
clean_tokens() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
codoc_frequencies() (in module tmtoolkit.bow.bow_stats)
combine_sparse_matrices_columnwise() (in module tmtoolkit.utils)
compact_documents() (in module tmtoolkit.preprocess)
compute_models_parallel() (in module tmtoolkit.topicmod.tm_gensim)
(in module tmtoolkit.topicmod.tm_lda)
(in module tmtoolkit.topicmod.tm_sklearn)
copy() (tmtoolkit.corpus.Corpus method)
(tmtoolkit.preprocess.TMPreproc method)
Corpus (class in tmtoolkit.corpus)
create_sparse_dtm() (in module tmtoolkit.bow.dtm)
D
DEFAULT_METRICS (in module tmtoolkit.topicmod.tm_gensim)
(in module tmtoolkit.topicmod.tm_lda)
(in module tmtoolkit.topicmod.tm_sklearn)
DEFAULT_WORDCLOUD_KWARGS (in module tmtoolkit.topicmod.visualize)
doc_frequencies() (in module tmtoolkit.bow.bow_stats)
(in module tmtoolkit.preprocess)
doc_labels (tmtoolkit.corpus.Corpus property)
(tmtoolkit.preprocess.TMPreproc property)
doc_labels() (in module tmtoolkit.preprocess)
doc_lengths (tmtoolkit.corpus.Corpus property)
(tmtoolkit.preprocess.TMPreproc property)
doc_lengths() (in module tmtoolkit.bow.bow_stats)
(in module tmtoolkit.preprocess)
doc_tokens() (in module tmtoolkit.preprocess)
doc_vectors (tmtoolkit.preprocess.TMPreproc property)
dtm (tmtoolkit.preprocess.TMPreproc property)
dtm_and_vocab_to_gensim_corpus_and_dict() (in module tmtoolkit.bow.dtm)
dtm_to_dataframe() (in module tmtoolkit.bow.dtm)
dtm_to_datatable() (in module tmtoolkit.bow.dtm)
dtm_to_gensim_corpus() (in module tmtoolkit.bow.dtm)
E
empty_chararray() (in module tmtoolkit.utils)
evaluate_topic_models() (in module tmtoolkit.topicmod.tm_gensim)
(in module tmtoolkit.topicmod.tm_lda)
(in module tmtoolkit.topicmod.tm_sklearn)
exclude_topics() (in module tmtoolkit.topicmod.model_stats)
expand_compound_token() (in module tmtoolkit.preprocess)
expand_compound_tokens() (tmtoolkit.preprocess.TMPreproc method)
expand_compounds() (in module tmtoolkit.preprocess)
F
filter_by_max_length() (tmtoolkit.corpus.Corpus method)
filter_by_min_length() (tmtoolkit.corpus.Corpus method)
filter_characters() (tmtoolkit.corpus.Corpus method)
filter_documents() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
filter_documents_by_name() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
filter_for_pos() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
filter_tokens() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
filter_tokens_by_mask() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
filter_tokens_with_kwic() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
filter_topics() (in module tmtoolkit.topicmod.model_stats)
fit_model() (tmtoolkit.topicmod.parallel.MultiprocModelsWorkerABC method)
flatten_list() (in module tmtoolkit.utils)
from_builtin_corpus() (tmtoolkit.corpus.Corpus class method)
from_files() (tmtoolkit.corpus.Corpus class method)
from_folder() (tmtoolkit.corpus.Corpus class method)
from_pickle() (tmtoolkit.corpus.Corpus class method)
from_state() (tmtoolkit.preprocess.TMPreproc class method)
from_tabular() (tmtoolkit.corpus.Corpus class method)
from_tokens() (tmtoolkit.preprocess.TMPreproc class method)
from_tokens_datatable() (tmtoolkit.preprocess.TMPreproc class method)
from_zip() (tmtoolkit.corpus.Corpus class method)
G
generate_ngrams() (tmtoolkit.preprocess.TMPreproc method)
generate_topic_labels_from_top_words() (in module tmtoolkit.topicmod.model_stats)
generate_wordcloud_from_probabilities_and_words() (in module tmtoolkit.topicmod.visualize)
generate_wordcloud_from_weights() (in module tmtoolkit.topicmod.visualize)
generate_wordclouds_for_document_topics() (in module tmtoolkit.topicmod.visualize)
generate_wordclouds_for_topic_words() (in module tmtoolkit.topicmod.visualize)
generate_wordclouds_from_distribution() (in module tmtoolkit.topicmod.visualize)
gensim_corpus_to_dtm() (in module tmtoolkit.bow.dtm)
get() (tmtoolkit.corpus.Corpus method)
get_available_metadata_keys() (tmtoolkit.preprocess.TMPreproc method)
get_doc_labels() (tmtoolkit.corpus.Corpus method)
get_dtm() (tmtoolkit.preprocess.TMPreproc method)
get_kwic() (tmtoolkit.preprocess.TMPreproc method)
get_kwic_table() (tmtoolkit.preprocess.TMPreproc method)
get_ngrams() (tmtoolkit.preprocess.TMPreproc method)
get_tokens() (tmtoolkit.preprocess.TMPreproc method)
get_vocabulary() (tmtoolkit.preprocess.TMPreproc method)
glue_tokens() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
greedy_partitioning() (in module tmtoolkit.utils)
I
idf() (in module tmtoolkit.bow.bow_stats)
idf_probabilistic() (in module tmtoolkit.bow.bow_stats)
ids2tokens() (in module tmtoolkit.preprocess)
init_for_language() (in module tmtoolkit.preprocess)
items() (tmtoolkit.corpus.Corpus method)
J
join_ngrams() (tmtoolkit.preprocess.TMPreproc method)
K
keys() (tmtoolkit.corpus.Corpus method)
kwic() (in module tmtoolkit.preprocess)
kwic_table() (in module tmtoolkit.preprocess)
L
ldamodel_full_doc_topics() (in module tmtoolkit.topicmod.model_io)
ldamodel_full_topic_words() (in module tmtoolkit.topicmod.model_io)
ldamodel_top_doc_topics() (in module tmtoolkit.topicmod.model_io)
ldamodel_top_topic_docs() (in module tmtoolkit.topicmod.model_io)
ldamodel_top_topic_words() (in module tmtoolkit.topicmod.model_io)
ldamodel_top_word_topics() (in module tmtoolkit.topicmod.model_io)
least_distinct_words() (in module tmtoolkit.topicmod.model_stats)
least_probable_words() (in module tmtoolkit.topicmod.model_stats)
least_relevant_words_for_topic() (in module tmtoolkit.topicmod.model_stats)
least_salient_words() (in module tmtoolkit.topicmod.model_stats)
lemmatize() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
linebreaks_win2unix() (in module tmtoolkit.corpus)
load_ldamodel_from_pickle() (in module tmtoolkit.topicmod.model_io)
load_state() (tmtoolkit.preprocess.TMPreproc method)
load_stopwords() (in module tmtoolkit.preprocess)
load_tokens() (tmtoolkit.preprocess.TMPreproc method)
load_tokens_datatable() (tmtoolkit.preprocess.TMPreproc method)
M
make_index_window_around_matches() (in module tmtoolkit.preprocess)
marginal_topic_distrib() (in module tmtoolkit.topicmod.model_stats)
marginal_word_distrib() (in module tmtoolkit.topicmod.model_stats)
mat2d_window_from_indices() (in module tmtoolkit.utils)
merge_dict_sequences_inplace() (in module tmtoolkit.utils)
metric_arun_2010() (in module tmtoolkit.topicmod.evaluate)
metric_cao_juan_2009() (in module tmtoolkit.topicmod.evaluate)
metric_coherence_gensim() (in module tmtoolkit.topicmod.evaluate)
metric_coherence_mimno_2011() (in module tmtoolkit.topicmod.evaluate)
metric_griffiths_2004() (in module tmtoolkit.topicmod.evaluate)
metric_held_out_documents_wallach09() (in module tmtoolkit.topicmod.evaluate)
module
tmtoolkit.bow.bow_stats
tmtoolkit.bow.dtm
tmtoolkit.corpus
tmtoolkit.preprocess
tmtoolkit.topicmod
tmtoolkit.topicmod.evaluate
tmtoolkit.topicmod.model_io
tmtoolkit.topicmod.model_stats
tmtoolkit.topicmod.parallel
tmtoolkit.topicmod.tm_gensim
tmtoolkit.topicmod.tm_lda
tmtoolkit.topicmod.tm_sklearn
tmtoolkit.utils
most_distinct_words() (in module tmtoolkit.topicmod.model_stats)
most_probable_words() (in module tmtoolkit.topicmod.model_stats)
most_relevant_words_for_topic() (in module tmtoolkit.topicmod.model_stats)
most_salient_words() (in module tmtoolkit.topicmod.model_stats)
MultiprocEvaluationRunner (class in tmtoolkit.topicmod.parallel)
MultiprocEvaluationWorkerABC (class in tmtoolkit.topicmod.parallel)
MultiprocModelsRunner (class in tmtoolkit.topicmod.parallel)
MultiprocModelsWorkerABC (class in tmtoolkit.topicmod.parallel)
N
n_docs (tmtoolkit.corpus.Corpus property)
(tmtoolkit.preprocess.TMPreproc property)
n_tokens (tmtoolkit.preprocess.TMPreproc property)
ngrams (tmtoolkit.preprocess.TMPreproc property)
ngrams() (in module tmtoolkit.preprocess)
ngrams_generated (tmtoolkit.preprocess.TMPreproc property)
normalize_to_unit_range() (in module tmtoolkit.utils)
P
paragraphs_from_lines() (in module tmtoolkit.corpus)
parameters_for_ldavis() (in module tmtoolkit.topicmod.visualize)
path_recursive_split() (in module tmtoolkit.corpus)
pickle_data() (in module tmtoolkit.utils)
plot_doc_topic_heatmap() (in module tmtoolkit.topicmod.visualize)
plot_eval_results() (in module tmtoolkit.topicmod.visualize)
plot_heatmap() (in module tmtoolkit.topicmod.visualize)
plot_topic_word_heatmap() (in module tmtoolkit.topicmod.visualize)
pos_tag() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
pos_tagged (tmtoolkit.preprocess.TMPreproc property)
pos_tags() (in module tmtoolkit.preprocess)
print_ldamodel_distribution() (in module tmtoolkit.topicmod.model_io)
print_ldamodel_doc_topics() (in module tmtoolkit.topicmod.model_io)
print_ldamodel_topic_words() (in module tmtoolkit.topicmod.model_io)
print_summary() (tmtoolkit.preprocess.TMPreproc method)
R
read_text_file() (in module tmtoolkit.corpus)
remove_characters() (tmtoolkit.corpus.Corpus method)
remove_chars() (in module tmtoolkit.preprocess)
remove_chars_in_tokens() (tmtoolkit.preprocess.TMPreproc method)
remove_common_tokens() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
remove_documents() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
remove_documents_by_name() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
remove_metadata() (tmtoolkit.preprocess.TMPreproc method)
remove_special_chars_in_tokens() (tmtoolkit.preprocess.TMPreproc method)
remove_tokens() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
remove_tokens_by_doc_frequency() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
remove_tokens_by_mask() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
remove_uncommon_tokens() (in module tmtoolkit.preprocess)
(tmtoolkit.preprocess.TMPreproc method)
replace_characters() (tmtoolkit.corpus.Corpus method)
require_attrs() (in module tmtoolkit.utils)
require_dictlike() (in module tmtoolkit.utils)
require_listlike() (in module tmtoolkit.utils)
require_listlike_or_set() (in module tmtoolkit.utils)
require_types() (in module tmtoolkit.utils)
results_by_parameter() (in module tmtoolkit.topicmod.evaluate)
run() (tmtoolkit.topicmod.parallel.MultiprocModelsRunner method)
(tmtoolkit.topicmod.parallel.MultiprocModelsWorkerABC method)
S
sample() (tmtoolkit.corpus.Corpus method)
save_ldamodel_summary_to_excel() (in module tmtoolkit.topicmod.model_io)
save_ldamodel_to_pickle() (in module tmtoolkit.topicmod.model_io)
save_state() (tmtoolkit.preprocess.TMPreproc method)
send_results() (tmtoolkit.topicmod.parallel.MultiprocModelsWorkerABC method)
shutdown_workers() (tmtoolkit.preprocess.TMPreproc method)
(tmtoolkit.topicmod.parallel.MultiprocModelsRunner method)
simplified_pos() (in module tmtoolkit.preprocess)
sorted_terms() (in module tmtoolkit.bow.bow_stats)
sorted_terms_datatable() (in module tmtoolkit.bow.bow_stats)
spacy_docs (tmtoolkit.preprocess.TMPreproc property)
spacydoc_from_tokens() (in module tmtoolkit.preprocess)
sparse_dtm() (in module tmtoolkit.preprocess)
split_by_paragraphs() (tmtoolkit.corpus.Corpus method)
str_multisplit() (in module tmtoolkit.preprocess)
str_shape() (in module tmtoolkit.preprocess)
str_shapesplit() (in module tmtoolkit.preprocess)
T
term_frequencies() (in module tmtoolkit.bow.bow_stats)
texts (tmtoolkit.preprocess.TMPreproc property)
tf_binary() (in module tmtoolkit.bow.bow_stats)
tf_double_norm() (in module tmtoolkit.bow.bow_stats)
tf_log() (in module tmtoolkit.bow.bow_stats)
tf_proportions() (in module tmtoolkit.bow.bow_stats)
tfidf() (in module tmtoolkit.bow.bow_stats)
TMPreproc (class in tmtoolkit.preprocess)
tmtoolkit.bow.bow_stats
module
tmtoolkit.bow.dtm
module
tmtoolkit.corpus
module
tmtoolkit.preprocess
module
tmtoolkit.topicmod
module
tmtoolkit.topicmod.evaluate
module
tmtoolkit.topicmod.model_io
module
tmtoolkit.topicmod.model_stats
module
tmtoolkit.topicmod.parallel
module
tmtoolkit.topicmod.tm_gensim
module
tmtoolkit.topicmod.tm_lda
module
tmtoolkit.topicmod.tm_sklearn
module
tmtoolkit.utils
module
to_lowercase() (in module tmtoolkit.preprocess)
to_pickle() (tmtoolkit.corpus.Corpus method)
token_glue_subsequent() (in module tmtoolkit.preprocess)
token_match() (in module tmtoolkit.preprocess)
token_match_subsequent() (in module tmtoolkit.preprocess)
token_vectors (tmtoolkit.preprocess.TMPreproc property)
tokendocs2spacydocs() (in module tmtoolkit.preprocess)
tokenize() (in module tmtoolkit.preprocess)
tokens (tmtoolkit.preprocess.TMPreproc property)
tokens2ids() (in module tmtoolkit.preprocess)
tokens_dataframe (tmtoolkit.preprocess.TMPreproc property)
tokens_datatable (tmtoolkit.preprocess.TMPreproc property)
tokens_to_lowercase() (tmtoolkit.preprocess.TMPreproc method)
tokens_with_metadata (tmtoolkit.preprocess.TMPreproc property)
tokens_with_pos_tags (tmtoolkit.preprocess.TMPreproc property)
top_n_from_distribution() (in module tmtoolkit.topicmod.model_stats)
top_words_for_topics() (in module tmtoolkit.topicmod.model_stats)
topic_word_relevance() (in module tmtoolkit.topicmod.model_stats)
transform() (in module tmtoolkit.preprocess)
transform_tokens() (tmtoolkit.preprocess.TMPreproc method)
U
unique_characters (tmtoolkit.corpus.Corpus property)
unpickle_file() (in module tmtoolkit.utils)
V
values() (tmtoolkit.corpus.Corpus method)
vocabulary (tmtoolkit.preprocess.TMPreproc property)
vocabulary() (in module tmtoolkit.preprocess)
vocabulary_abs_doc_frequency (tmtoolkit.preprocess.TMPreproc property)
vocabulary_counts (tmtoolkit.preprocess.TMPreproc property)
vocabulary_counts() (in module tmtoolkit.preprocess)
vocabulary_rel_doc_frequency (tmtoolkit.preprocess.TMPreproc property)
vocabulary_size (tmtoolkit.preprocess.TMPreproc property)
W
widen_chararray() (in module tmtoolkit.utils)
word_cooccurrence() (in module tmtoolkit.bow.bow_stats)
word_distinctiveness() (in module tmtoolkit.topicmod.model_stats)
word_saliency() (in module tmtoolkit.topicmod.model_stats)
write_wordclouds_to_folder() (in module tmtoolkit.topicmod.visualize)
Read the Docs
v: v0.10.0.post1
Versions
latest
v0.11.1
v0.11.0
v0.10.0.post1
Downloads
On Read the Docs
Project Home
Builds