tmtoolkit
latest
Contents:
Installation
Getting started
Working with text corpora
Text preprocessing and basic text mining
Working with the Bag-of-Words representation
Topic modeling
Interoperability with R
API
Development
Version history
References
tmtoolkit
Index
Edit on GitHub
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
K
|
L
|
M
|
N
|
P
|
R
|
S
|
T
|
U
|
V
|
W
_
__init__() (tmtoolkit.corpus.Corpus method)
(tmtoolkit.corpus.Document method)
(tmtoolkit.ngrammodels.NGramModel method)
(tmtoolkit.tokenseq.Counter method)
(tmtoolkit.topicmod.parallel.MultiprocEvaluationRunner method)
(tmtoolkit.topicmod.parallel.MultiprocEvaluationWorkerABC method)
(tmtoolkit.topicmod.parallel.MultiprocModelsRunner method)
(tmtoolkit.topicmod.parallel.MultiprocModelsWorkerABC method)
A
applychain() (in module tmtoolkit.utils)
argsort() (in module tmtoolkit.utils)
as_chararray() (in module tmtoolkit.utils)
AVAILABLE_METRICS (in module tmtoolkit.topicmod.tm_gensim)
(in module tmtoolkit.topicmod.tm_lda)
(in module tmtoolkit.topicmod.tm_sklearn)
B
bimaps (tmtoolkit.corpus.Corpus attribute)
builtin_corpora_info() (in module tmtoolkit.corpus)
C
chararray_elem_size() (in module tmtoolkit.utils)
check_context_size() (in module tmtoolkit.utils)
codoc_frequencies() (in module tmtoolkit.bow.bow_stats)
collapse_tokens() (in module tmtoolkit.tokenseq)
combine_sparse_matrices_columnwise() (in module tmtoolkit.utils)
compute_models_parallel() (in module tmtoolkit.topicmod.tm_gensim)
(in module tmtoolkit.topicmod.tm_lda)
(in module tmtoolkit.topicmod.tm_sklearn)
convert_token_sequence() (tmtoolkit.ngrammodels.NGramModel method)
copy() (in module tmtoolkit.tokenseq)
(tmtoolkit.tokenseq.Counter method)
Corpus (class in tmtoolkit.corpus)
corpus_add_files() (in module tmtoolkit.corpus)
corpus_add_folder() (in module tmtoolkit.corpus)
corpus_add_tabular() (in module tmtoolkit.corpus)
corpus_add_zip() (in module tmtoolkit.corpus)
corpus_collocations() (in module tmtoolkit.corpus)
corpus_join_documents() (in module tmtoolkit.corpus)
corpus_ngramify() (in module tmtoolkit.corpus)
corpus_num_chars() (in module tmtoolkit.corpus)
corpus_num_tokens() (in module tmtoolkit.corpus)
corpus_retokenize() (in module tmtoolkit.corpus)
corpus_sample() (in module tmtoolkit.corpus)
corpus_split_by_paragraph() (in module tmtoolkit.corpus)
corpus_split_by_token() (in module tmtoolkit.corpus)
corpus_summary() (in module tmtoolkit.corpus)
corpus_tokens_flattened() (in module tmtoolkit.corpus)
corpus_unique_chars() (in module tmtoolkit.corpus)
Counter (class in tmtoolkit.tokenseq)
create_sparse_dtm() (in module tmtoolkit.bow.dtm)
custom_token_attrs_defaults (tmtoolkit.corpus.Corpus property)
D
DEFAULT_METRICS (in module tmtoolkit.topicmod.tm_gensim)
(in module tmtoolkit.topicmod.tm_lda)
(in module tmtoolkit.topicmod.tm_sklearn)
DEFAULT_WORDCLOUD_KWARGS (in module tmtoolkit.topicmod.visualize)
deserialize_corpus() (in module tmtoolkit.corpus)
dict2df() (in module tmtoolkit.utils)
disable_logging() (in module tmtoolkit.utils)
doc_attrs (tmtoolkit.corpus.Corpus property)
doc_attrs_defaults (tmtoolkit.corpus.Corpus property)
doc_frequencies() (in module tmtoolkit.bow.bow_stats)
(in module tmtoolkit.corpus)
doc_labels (tmtoolkit.corpus.Corpus property)
doc_labels() (in module tmtoolkit.corpus)
doc_labels_sample() (in module tmtoolkit.corpus)
doc_lengths() (in module tmtoolkit.bow.bow_stats)
(in module tmtoolkit.corpus)
doc_num_sents() (in module tmtoolkit.corpus)
doc_sent_lengths() (in module tmtoolkit.corpus)
doc_texts() (in module tmtoolkit.corpus)
doc_token_lengths() (in module tmtoolkit.corpus)
doc_tokens() (in module tmtoolkit.corpus)
doc_vectors() (in module tmtoolkit.corpus)
Document (class in tmtoolkit.corpus)
document_from_attrs() (in module tmtoolkit.corpus)
document_token_attr() (in module tmtoolkit.corpus)
dtm() (in module tmtoolkit.corpus)
dtm_and_vocab_to_gensim_corpus_and_dict() (in module tmtoolkit.bow.dtm)
dtm_to_dataframe() (in module tmtoolkit.bow.dtm)
dtm_to_gensim_corpus() (in module tmtoolkit.bow.dtm)
E
elements() (tmtoolkit.tokenseq.Counter method)
empty_chararray() (in module tmtoolkit.tokenseq)
(in module tmtoolkit.utils)
enable_logging() (in module tmtoolkit.utils)
evaluate_topic_models() (in module tmtoolkit.topicmod.tm_gensim)
(in module tmtoolkit.topicmod.tm_lda)
(in module tmtoolkit.topicmod.tm_sklearn)
exclude_topics() (in module tmtoolkit.topicmod.model_stats)
F
filter_clean_tokens() (in module tmtoolkit.corpus)
filter_documents() (in module tmtoolkit.corpus)
filter_documents_by_docattr() (in module tmtoolkit.corpus)
filter_documents_by_label() (in module tmtoolkit.corpus)
filter_documents_by_length() (in module tmtoolkit.corpus)
filter_documents_by_mask() (in module tmtoolkit.corpus)
filter_for_pos() (in module tmtoolkit.corpus)
filter_tokens() (in module tmtoolkit.corpus)
filter_tokens_by_doc_frequency() (in module tmtoolkit.corpus)
filter_tokens_by_mask() (in module tmtoolkit.corpus)
filter_tokens_with_kwic() (in module tmtoolkit.corpus)
filter_topics() (in module tmtoolkit.topicmod.model_stats)
find_documents() (in module tmtoolkit.corpus)
fit() (tmtoolkit.ngrammodels.NGramModel method)
fit_model() (tmtoolkit.topicmod.parallel.MultiprocModelsWorkerABC method)
flatten_list() (in module tmtoolkit.utils)
from_builtin_corpus() (tmtoolkit.corpus.Corpus class method)
from_files() (tmtoolkit.corpus.Corpus class method)
from_folder() (tmtoolkit.corpus.Corpus class method)
from_tabular() (tmtoolkit.corpus.Corpus class method)
from_zip() (tmtoolkit.corpus.Corpus class method)
fromkeys() (tmtoolkit.tokenseq.Counter class method)
G
generate_sequence() (tmtoolkit.ngrammodels.NGramModel method)
generate_topic_labels_from_top_words() (in module tmtoolkit.topicmod.model_stats)
generate_wordcloud_from_probabilities_and_words() (in module tmtoolkit.topicmod.visualize)
generate_wordcloud_from_weights() (in module tmtoolkit.topicmod.visualize)
generate_wordclouds_for_document_topics() (in module tmtoolkit.topicmod.visualize)
generate_wordclouds_for_topic_words() (in module tmtoolkit.topicmod.visualize)
generate_wordclouds_from_distribution() (in module tmtoolkit.topicmod.visualize)
gensim_corpus_to_dtm() (in module tmtoolkit.bow.dtm)
get() (tmtoolkit.corpus.Corpus method)
greedy_partitioning() (in module tmtoolkit.utils)
H
has_sents (tmtoolkit.corpus.Corpus property)
(tmtoolkit.corpus.Document property)
I
idf() (in module tmtoolkit.bow.bow_stats)
idf_probabilistic() (in module tmtoolkit.bow.bow_stats)
index_windows_around_matches() (in module tmtoolkit.tokenseq)
indices_of_matches() (in module tmtoolkit.tokenseq)
(in module tmtoolkit.utils)
items() (tmtoolkit.corpus.Corpus method)
J
join_collocations_by_patterns() (in module tmtoolkit.corpus)
join_collocations_by_statistic() (in module tmtoolkit.corpus)
K
keys() (tmtoolkit.corpus.Corpus method)
kwic() (in module tmtoolkit.corpus)
kwic_table() (in module tmtoolkit.corpus)
L
label (tmtoolkit.corpus.Document property)
language (tmtoolkit.corpus.Corpus property)
language_model (tmtoolkit.corpus.Corpus property)
ldamodel_full_doc_topics() (in module tmtoolkit.topicmod.model_io)
ldamodel_full_topic_words() (in module tmtoolkit.topicmod.model_io)
ldamodel_top_doc_topics() (in module tmtoolkit.topicmod.model_io)
ldamodel_top_topic_docs() (in module tmtoolkit.topicmod.model_io)
ldamodel_top_topic_words() (in module tmtoolkit.topicmod.model_io)
ldamodel_top_word_topics() (in module tmtoolkit.topicmod.model_io)
least_distinct_words() (in module tmtoolkit.topicmod.model_stats)
least_probable_words() (in module tmtoolkit.topicmod.model_stats)
least_relevant_words_for_topic() (in module tmtoolkit.topicmod.model_stats)
least_salient_words() (in module tmtoolkit.topicmod.model_stats)
lemmatize() (in module tmtoolkit.corpus)
linebreaks_win2unix() (in module tmtoolkit.utils)
load_corpus_from_picklefile() (in module tmtoolkit.corpus)
load_corpus_from_tokens() (in module tmtoolkit.corpus)
load_corpus_from_tokens_table() (in module tmtoolkit.corpus)
load_ldamodel_from_pickle() (in module tmtoolkit.topicmod.model_io)
M
marginal_topic_distrib() (in module tmtoolkit.topicmod.model_stats)
marginal_word_distrib() (in module tmtoolkit.topicmod.model_stats)
mat2d_window_from_indices() (in module tmtoolkit.utils)
max_workers (tmtoolkit.corpus.Corpus property)
merge_dicts() (in module tmtoolkit.utils)
merge_sets() (in module tmtoolkit.utils)
metric_arun_2010() (in module tmtoolkit.topicmod.evaluate)
metric_cao_juan_2009() (in module tmtoolkit.topicmod.evaluate)
metric_coherence_gensim() (in module tmtoolkit.topicmod.evaluate)
metric_coherence_mimno_2011() (in module tmtoolkit.topicmod.evaluate)
metric_griffiths_2004() (in module tmtoolkit.topicmod.evaluate)
metric_held_out_documents_wallach09() (in module tmtoolkit.topicmod.evaluate)
module
tmtoolkit.bow.bow_stats
tmtoolkit.bow.dtm
tmtoolkit.corpus
tmtoolkit.corpus.visualize
tmtoolkit.ngrammodels
tmtoolkit.strings
tmtoolkit.tokenseq
tmtoolkit.topicmod
tmtoolkit.topicmod.evaluate
tmtoolkit.topicmod.model_io
tmtoolkit.topicmod.model_stats
tmtoolkit.topicmod.parallel
tmtoolkit.topicmod.tm_gensim
tmtoolkit.topicmod.tm_lda
tmtoolkit.topicmod.tm_sklearn
tmtoolkit.utils
most_common() (tmtoolkit.tokenseq.Counter method)
most_distinct_words() (in module tmtoolkit.topicmod.model_stats)
most_probable_words() (in module tmtoolkit.topicmod.model_stats)
most_relevant_words_for_topic() (in module tmtoolkit.topicmod.model_stats)
most_salient_words() (in module tmtoolkit.topicmod.model_stats)
MultiprocEvaluationRunner (class in tmtoolkit.topicmod.parallel)
MultiprocEvaluationWorkerABC (class in tmtoolkit.topicmod.parallel)
MultiprocModelsRunner (class in tmtoolkit.topicmod.parallel)
MultiprocModelsWorkerABC (class in tmtoolkit.topicmod.parallel)
N
n_docs (tmtoolkit.corpus.Corpus property)
NGramModel (class in tmtoolkit.ngrammodels)
ngrams (tmtoolkit.corpus.Corpus property)
ngrams() (in module tmtoolkit.corpus)
ngrams_join_str (tmtoolkit.corpus.Corpus property)
nlp (tmtoolkit.corpus.Corpus attribute)
normalize_unicode() (in module tmtoolkit.corpus)
npmi() (in module tmtoolkit.tokenseq)
numbers_to_magnitudes() (in module tmtoolkit.corpus)
numbertoken_to_magnitude() (in module tmtoolkit.strings)
P
pad_sequence() (in module tmtoolkit.tokenseq)
(tmtoolkit.ngrammodels.NGramModel method)
pairwise_max_table() (in module tmtoolkit.utils)
parameters_for_ldavis() (in module tmtoolkit.topicmod.visualize)
partial_sparse_log() (in module tmtoolkit.utils)
path_split() (in module tmtoolkit.utils)
perplexity() (tmtoolkit.ngrammodels.NGramModel method)
pickle_data() (in module tmtoolkit.utils)
plot_doc_frequencies_hist() (in module tmtoolkit.corpus.visualize)
plot_doc_lengths_hist() (in module tmtoolkit.corpus.visualize)
plot_doc_topic_heatmap() (in module tmtoolkit.topicmod.visualize)
plot_doc_topic_ranked_prob() (in module tmtoolkit.topicmod.visualize)
plot_eval_results() (in module tmtoolkit.topicmod.visualize)
plot_heatmap() (in module tmtoolkit.topicmod.visualize)
plot_num_sents_hist() (in module tmtoolkit.corpus.visualize)
plot_num_sents_vs_sent_length() (in module tmtoolkit.corpus.visualize)
plot_prob_distrib_ranked_prob() (in module tmtoolkit.topicmod.visualize)
plot_ranked_vocab_counts() (in module tmtoolkit.corpus.visualize)
plot_sent_lengths_hist() (in module tmtoolkit.corpus.visualize)
plot_token_lengths_hist() (in module tmtoolkit.corpus.visualize)
plot_topic_word_heatmap() (in module tmtoolkit.topicmod.visualize)
plot_topic_word_ranked_prob() (in module tmtoolkit.topicmod.visualize)
plot_vocab_counts_hist() (in module tmtoolkit.corpus.visualize)
pmi() (in module tmtoolkit.tokenseq)
pmi2() (in module tmtoolkit.tokenseq)
pmi3() (in module tmtoolkit.tokenseq)
ppmi() (in module tmtoolkit.tokenseq)
predict() (tmtoolkit.ngrammodels.NGramModel method)
print_ldamodel_distribution() (in module tmtoolkit.topicmod.model_io)
print_ldamodel_doc_topics() (in module tmtoolkit.topicmod.model_io)
print_ldamodel_topic_words() (in module tmtoolkit.topicmod.model_io)
print_summary() (in module tmtoolkit.corpus)
print_summary_default_max_documents (tmtoolkit.corpus.Corpus attribute)
print_summary_default_max_tokens_string_length (tmtoolkit.corpus.Corpus attribute)
prob() (tmtoolkit.ngrammodels.NGramModel method)
procexec (tmtoolkit.corpus.Corpus attribute)
punctuation (tmtoolkit.corpus.Corpus attribute)
R
read_dtm_from_rds() (in module tmtoolkit.bow.dtm)
read_text_file() (in module tmtoolkit.utils)
remove_chars() (in module tmtoolkit.corpus)
remove_common_tokens() (in module tmtoolkit.corpus)
remove_document_attr() (in module tmtoolkit.corpus)
remove_documents() (in module tmtoolkit.corpus)
remove_documents_by_docattr() (in module tmtoolkit.corpus)
remove_documents_by_label() (in module tmtoolkit.corpus)
remove_documents_by_length() (in module tmtoolkit.corpus)
remove_documents_by_mask() (in module tmtoolkit.corpus)
remove_punctuation() (in module tmtoolkit.corpus)
remove_token_attr() (in module tmtoolkit.corpus)
remove_tokens() (in module tmtoolkit.corpus)
remove_tokens_by_mask() (in module tmtoolkit.corpus)
remove_uncommon_tokens() (in module tmtoolkit.corpus)
results_by_parameter() (in module tmtoolkit.topicmod.evaluate)
run() (tmtoolkit.topicmod.parallel.MultiprocModelsRunner method)
(tmtoolkit.topicmod.parallel.MultiprocModelsWorkerABC method)
S
sample_dict() (in module tmtoolkit.utils)
save_corpus_to_picklefile() (in module tmtoolkit.corpus)
save_dtm_to_rds() (in module tmtoolkit.bow.dtm)
save_ldamodel_summary_to_excel() (in module tmtoolkit.topicmod.model_io)
save_ldamodel_to_pickle() (in module tmtoolkit.topicmod.model_io)
send_results() (tmtoolkit.topicmod.parallel.MultiprocModelsWorkerABC method)
serialize_corpus() (in module tmtoolkit.corpus)
set_document_attr() (in module tmtoolkit.corpus)
set_logging_level() (in module tmtoolkit.utils)
set_token_attr() (in module tmtoolkit.corpus)
shutdown_workers() (tmtoolkit.topicmod.parallel.MultiprocModelsRunner method)
simplified_pos() (in module tmtoolkit.corpus)
simplify_unicode() (in module tmtoolkit.corpus)
simplify_unicode_chars() (in module tmtoolkit.strings)
sorted_df() (in module tmtoolkit.utils)
sorted_terms() (in module tmtoolkit.bow.bow_stats)
sorted_terms_table() (in module tmtoolkit.bow.bow_stats)
spacy_token_attrs (tmtoolkit.corpus.Corpus property)
spacydocs() (in module tmtoolkit.corpus)
split_func_args() (in module tmtoolkit.utils)
strip_tags() (in module tmtoolkit.strings)
subtract() (tmtoolkit.tokenseq.Counter method)
T
term_frequencies() (in module tmtoolkit.bow.bow_stats)
tf_binary() (in module tmtoolkit.bow.bow_stats)
tf_double_norm() (in module tmtoolkit.bow.bow_stats)
tf_log() (in module tmtoolkit.bow.bow_stats)
tf_proportions() (in module tmtoolkit.bow.bow_stats)
tfidf() (in module tmtoolkit.bow.bow_stats)
tmtoolkit.bow.bow_stats
module
tmtoolkit.bow.dtm
module
tmtoolkit.corpus
module
tmtoolkit.corpus.visualize
module
tmtoolkit.ngrammodels
module
tmtoolkit.strings
module
tmtoolkit.tokenseq
module
tmtoolkit.topicmod
module
tmtoolkit.topicmod.evaluate
module
tmtoolkit.topicmod.model_io
module
tmtoolkit.topicmod.model_stats
module
tmtoolkit.topicmod.parallel
module
tmtoolkit.topicmod.tm_gensim
module
tmtoolkit.topicmod.tm_lda
module
tmtoolkit.topicmod.tm_sklearn
module
tmtoolkit.utils
module
to_lowercase() (in module tmtoolkit.corpus)
to_uppercase() (in module tmtoolkit.corpus)
token_attrs (tmtoolkit.corpus.Corpus property)
(tmtoolkit.corpus.Document property)
token_collocation_matrix() (in module tmtoolkit.tokenseq)
token_collocations() (in module tmtoolkit.tokenseq)
token_cooccurrence() (in module tmtoolkit.corpus)
token_hash_convert() (in module tmtoolkit.tokenseq)
token_join_subsequent() (in module tmtoolkit.tokenseq)
token_lengths() (in module tmtoolkit.tokenseq)
token_match() (in module tmtoolkit.tokenseq)
token_match_multi_pattern() (in module tmtoolkit.tokenseq)
token_match_subsequent() (in module tmtoolkit.tokenseq)
token_ngrams() (in module tmtoolkit.tokenseq)
token_vectors() (in module tmtoolkit.corpus)
tokens_table() (in module tmtoolkit.corpus)
top_n_from_distribution() (in module tmtoolkit.topicmod.model_stats)
top_words_for_topics() (in module tmtoolkit.topicmod.model_stats)
topic_word_relevance() (in module tmtoolkit.topicmod.model_stats)
transform_tokens() (in module tmtoolkit.corpus)
U
unique_chars() (in module tmtoolkit.tokenseq)
unpickle_file() (in module tmtoolkit.utils)
update() (tmtoolkit.corpus.Corpus method)
(tmtoolkit.tokenseq.Counter method)
uses_unigrams (tmtoolkit.corpus.Corpus property)
V
values() (tmtoolkit.corpus.Corpus method)
vocabulary() (in module tmtoolkit.corpus)
vocabulary_counts() (in module tmtoolkit.corpus)
vocabulary_size() (in module tmtoolkit.corpus)
W
word_cooccurrence() (in module tmtoolkit.bow.bow_stats)
word_distinctiveness() (in module tmtoolkit.topicmod.model_stats)
word_saliency() (in module tmtoolkit.topicmod.model_stats)
workers_docs (tmtoolkit.corpus.Corpus property)
workers_timeout (tmtoolkit.corpus.Corpus attribute)
write_wordclouds_to_folder() (in module tmtoolkit.topicmod.visualize)
Read the Docs
v: latest
Versions
latest
v0.12.0
v0.11.2
v0.11.1
v0.11.0
v0.10.0.post1
Downloads
On Read the Docs
Project Home
Builds