coax
latest
Examples
Getting Started
Agent Stubs
Linear Regression
Pong (Atari)
Cartpole
FrozenLake
Pendulum
DeepMind Control Suite
Function Approximators
Value Functions
Policies
Dynamics Models
Probability Distributions
Update Strategies
TD Learning
Policy Objectives
Model Updaters
Reward Tracing
Experience Replay
Value Losses
Value Transforms
Regularizers
Utilities
Wrappers
Workers
Utilities
Environments
Editorial
Index
Release Notes
coax
»
Index
Edit on GitHub
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
H
|
I
|
J
|
K
|
L
|
M
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
U
|
V
|
W
_
__call__() (coax.BoltzmannPolicy method)
(coax.EpsilonGreedy method)
(coax.Policy method)
(coax.Q method)
(coax.RandomPolicy method)
(coax.RewardFunction method)
(coax.StochasticQ method)
(coax.StochasticRewardFunction method)
(coax.StochasticTransitionModel method)
(coax.StochasticV method)
(coax.SuccessorStateQ method)
(coax.TransitionModel method)
(coax.utils.OrnsteinUhlenbeckNoise method)
(coax.utils.StepwiseLinearFunction method)
(coax.V method)
A
action_space (coax.wrappers.BoxActionsToDiscrete property)
(coax.wrappers.BoxActionsToReals property)
(coax.wrappers.FrameStacking property)
(coax.wrappers.MetaPolicyEnv property)
(coax.wrappers.TrainMonitor property)
add() (coax.experience_replay.PrioritizedReplayBuffer method)
(coax.experience_replay.SimpleReplayBuffer method)
(coax.reward_tracing.MonteCarlo method)
(coax.reward_tracing.NStep method)
affine_transform (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
apply_grads() (coax.model_updaters.ModelUpdater method)
(coax.policy_objectives.DeterministicPG method)
(coax.policy_objectives.PPOClip method)
(coax.policy_objectives.SoftPG method)
(coax.policy_objectives.VanillaPG method)
(coax.td_learning.ClippedDoubleQLearning method)
(coax.td_learning.DoubleQLearning method)
(coax.td_learning.ExpectedSarsa method)
(coax.td_learning.QLearning method)
(coax.td_learning.Sarsa method)
(coax.td_learning.SimpleTD method)
(coax.td_learning.SoftClippedDoubleQLearning method)
(coax.td_learning.SoftQLearning method)
argmax() (in module coax.utils)
argmin() (in module coax.utils)
B
batch_to_single() (in module coax.utils)
BoltzmannPolicy (class in coax)
BoxActionsToDiscrete (class in coax.wrappers)
BoxActionsToReals (class in coax.wrappers)
C
CategoricalDist (class in coax.proba_dists)
check_array() (in module coax.utils)
check_preprocessors() (in module coax.utils)
class_name() (coax.wrappers.BoxActionsToDiscrete class method)
(coax.wrappers.BoxActionsToReals class method)
(coax.wrappers.FrameStacking class method)
(coax.wrappers.MetaPolicyEnv class method)
(coax.wrappers.TrainMonitor class method)
clear() (coax.experience_replay.PrioritizedReplayBuffer method)
(coax.experience_replay.SimpleReplayBuffer method)
clipped_logit() (in module coax.utils)
ClippedDoubleQLearning (class in coax.td_learning)
close() (coax.envs.ConnectFourEnv method)
(coax.wrappers.BoxActionsToDiscrete method)
(coax.wrappers.BoxActionsToReals method)
(coax.wrappers.FrameStacking method)
(coax.wrappers.MetaPolicyEnv method)
(coax.wrappers.TrainMonitor method)
coax.envs
module
coax.experience_replay
module
coax.model_updaters
module
coax.policy_objectives
module
coax.proba_dists
module
coax.regularizers
module
coax.reward_tracing
module
coax.td_learning
module
coax.utils
module
coax.value_losses
module
coax.value_transforms
module
coax.wrappers
module
ConnectFourEnv (class in coax.envs)
copy() (coax.Policy method)
(coax.Q method)
(coax.reward_tracing.TransitionBatch method)
(coax.RewardFunction method)
(coax.StochasticQ method)
(coax.StochasticRewardFunction method)
(coax.StochasticTransitionModel method)
(coax.StochasticV method)
(coax.TransitionModel method)
(coax.V method)
cross_entropy (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
D
default_preprocessor() (in module coax.utils)
default_priors (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
DeterministicPG (class in coax.policy_objectives)
diff_transform() (in module coax.utils)
diff_transform_matrix() (in module coax.utils)
DiscretizedIntervalDist (class in coax.proba_dists)
dist_params() (coax.BoltzmannPolicy method)
(coax.EpsilonGreedy method)
(coax.Policy method)
(coax.RandomPolicy method)
(coax.StochasticQ method)
(coax.StochasticRewardFunction method)
(coax.StochasticTransitionModel method)
(coax.StochasticV method)
dist_params_structure (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
docstring() (in module coax.utils)
double_relu() (in module coax.utils)
DoubleQLearning (class in coax.td_learning)
dump() (in module coax.utils)
dumps() (in module coax.utils)
E
EmpiricalQuantileDist (class in coax.proba_dists)
enable_logging() (in module coax.utils)
entropy (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
EntropyRegularizer (class in coax.regularizers)
EpsilonGreedy (class in coax)
example_data() (coax.Policy class method)
(coax.Q class method)
(coax.RewardFunction class method)
(coax.StochasticQ class method)
(coax.StochasticRewardFunction class method)
(coax.StochasticTransitionModel class method)
(coax.StochasticV class method)
(coax.TransitionModel class method)
(coax.V class method)
ExpectedSarsa (class in coax.td_learning)
F
flush() (coax.reward_tracing.MonteCarlo method)
(coax.reward_tracing.NStep method)
FrameStacking (class in coax.wrappers)
from_single() (coax.reward_tracing.TransitionBatch class method)
function (coax.BoltzmannPolicy property)
(coax.EpsilonGreedy property)
(coax.Policy property)
(coax.Q property)
(coax.regularizers.EntropyRegularizer property)
(coax.regularizers.KLDivRegularizer property)
(coax.RewardFunction property)
(coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
(coax.StochasticV property)
(coax.TransitionModel property)
(coax.V property)
function_state (coax.BoltzmannPolicy property)
(coax.EpsilonGreedy property)
(coax.Policy property)
(coax.Q property)
(coax.RewardFunction property)
(coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
(coax.StochasticV property)
(coax.TransitionModel property)
(coax.V property)
function_type1 (coax.Q property)
(coax.RewardFunction property)
(coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
(coax.TransitionModel property)
function_type2 (coax.Q property)
(coax.RewardFunction property)
(coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
(coax.TransitionModel property)
G
generate_gif() (in module coax.utils)
get_counters() (coax.wrappers.TrainMonitor method)
get_env_attr() (in module coax.utils)
get_grads_diagnostics() (in module coax.utils)
get_magnitude_quantiles() (in module coax.utils)
get_metrics() (coax.wrappers.TrainMonitor method)
get_state() (coax.Worker method)
get_transition_batch() (in module coax.utils)
grads_and_metrics() (coax.model_updaters.ModelUpdater method)
(coax.policy_objectives.DeterministicPG method)
(coax.policy_objectives.PPOClip method)
(coax.policy_objectives.SoftPG method)
(coax.policy_objectives.VanillaPG method)
(coax.td_learning.ClippedDoubleQLearning method)
(coax.td_learning.DoubleQLearning method)
(coax.td_learning.ExpectedSarsa method)
(coax.td_learning.QLearning method)
(coax.td_learning.Sarsa method)
(coax.td_learning.SimpleTD method)
(coax.td_learning.SoftClippedDoubleQLearning method)
(coax.td_learning.SoftQLearning method)
H
has_env_attr() (in module coax.utils)
height (coax.utils.MaxTree property)
(coax.utils.MinTree property)
(coax.utils.SegmentTree property)
(coax.utils.SumTree property)
huber() (in module coax.value_losses)
hyperparams (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
I
idx() (in module coax.utils)
inverse_cdf() (coax.utils.SumTree method)
inverse_func (coax.value_transforms.LogTransform property)
(coax.value_transforms.ValueTransform property)
is_policy() (in module coax.utils)
is_qfunction() (in module coax.utils)
is_reward_function() (in module coax.utils)
is_stochastic() (in module coax.utils)
is_transition_model() (in module coax.utils)
is_vfunction() (in module coax.utils)
isscalar() (in module coax.utils)
J
jit() (in module coax.utils)
K
kl_divergence (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
KLDivRegularizer (class in coax.regularizers)
L
learn() (coax.Worker method)
load() (in module coax.utils)
load_counters() (coax.wrappers.TrainMonitor method)
loads() (in module coax.utils)
log_proba (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
logloss() (in module coax.value_losses)
logloss_sign() (in module coax.value_losses)
LogTransform (class in coax.value_transforms)
M
make_dmc() (in module coax.utils)
MaxTree (class in coax.utils)
mean (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
mean() (coax.BoltzmannPolicy method)
(coax.EpsilonGreedy method)
(coax.Policy method)
(coax.StochasticQ method)
(coax.StochasticRewardFunction method)
(coax.StochasticTransitionModel method)
(coax.StochasticV method)
mean_func (coax.BoltzmannPolicy property)
(coax.EpsilonGreedy property)
(coax.Policy property)
(coax.StochasticV property)
mean_func_type1 (coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
mean_func_type2 (coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
merge_dicts() (in module coax.utils)
metadata (coax.wrappers.BoxActionsToDiscrete property)
(coax.wrappers.BoxActionsToReals property)
(coax.wrappers.FrameStacking property)
(coax.wrappers.MetaPolicyEnv property)
(coax.wrappers.TrainMonitor property)
MetaPolicyEnv (class in coax.wrappers)
metrics_func (coax.regularizers.EntropyRegularizer property)
(coax.regularizers.KLDivRegularizer property)
MinTree (class in coax.utils)
mode (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
mode() (coax.BoltzmannPolicy method)
(coax.EpsilonGreedy method)
(coax.Policy method)
(coax.RandomPolicy method)
(coax.StochasticQ method)
(coax.StochasticRewardFunction method)
(coax.StochasticTransitionModel method)
(coax.StochasticV method)
mode_func (coax.BoltzmannPolicy property)
(coax.EpsilonGreedy property)
(coax.Policy property)
(coax.StochasticV property)
mode_func_type1 (coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
mode_func_type2 (coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
modeltype (coax.Q property)
(coax.RewardFunction property)
(coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
(coax.TransitionModel property)
ModelUpdater (class in coax.model_updaters)
module
coax.envs
coax.experience_replay
coax.model_updaters
coax.policy_objectives
coax.proba_dists
coax.regularizers
coax.reward_tracing
coax.td_learning
coax.utils
coax.value_losses
coax.value_transforms
coax.wrappers
MonteCarlo (class in coax.reward_tracing)
mse() (in module coax.value_losses)
N
NormalDist (class in coax.proba_dists)
np_random (coax.envs.ConnectFourEnv property)
(coax.wrappers.BoxActionsToDiscrete property)
(coax.wrappers.BoxActionsToReals property)
(coax.wrappers.FrameStacking property)
(coax.wrappers.MetaPolicyEnv property)
(coax.wrappers.TrainMonitor property)
NStep (class in coax.reward_tracing)
O
observation_space (coax.wrappers.BoxActionsToDiscrete property)
(coax.wrappers.BoxActionsToReals property)
(coax.wrappers.FrameStacking property)
(coax.wrappers.MetaPolicyEnv property)
(coax.wrappers.TrainMonitor property)
OrnsteinUhlenbeckNoise (class in coax.utils)
P
params (coax.BoltzmannPolicy property)
(coax.EpsilonGreedy property)
(coax.Policy property)
(coax.Q property)
(coax.RewardFunction property)
(coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
(coax.StochasticV property)
(coax.TransitionModel property)
(coax.V property)
partial_reduce() (coax.utils.MaxTree method)
(coax.utils.MinTree method)
(coax.utils.SegmentTree method)
(coax.utils.SumTree method)
Policy (class in coax)
pop() (coax.reward_tracing.MonteCarlo method)
(coax.reward_tracing.NStep method)
postprocess_variate() (coax.proba_dists.CategoricalDist method)
(coax.proba_dists.DiscretizedIntervalDist method)
(coax.proba_dists.EmpiricalQuantileDist method)
(coax.proba_dists.NormalDist method)
(coax.proba_dists.ProbaDist method)
(coax.proba_dists.SquashedNormalDist method)
PPOClip (class in coax.policy_objectives)
preprocess_variate() (coax.proba_dists.CategoricalDist method)
(coax.proba_dists.DiscretizedIntervalDist method)
(coax.proba_dists.EmpiricalQuantileDist method)
(coax.proba_dists.NormalDist method)
(coax.proba_dists.ProbaDist method)
(coax.proba_dists.SquashedNormalDist method)
pretty_print() (in module coax.utils)
pretty_repr() (in module coax.utils)
PrioritizedReplayBuffer (class in coax.experience_replay)
ProbaDist (class in coax.proba_dists)
Q
Q (class in coax)
QLearning (class in coax.td_learning)
quantile_cos_embedding() (in module coax.utils)
quantile_huber() (in module coax.value_losses)
quantiles() (in module coax.utils)
quantiles_uniform() (in module coax.utils)
R
RandomPolicy (class in coax)
record_metrics() (coax.wrappers.TrainMonitor method)
reload_recursive() (in module coax.utils)
render() (coax.envs.ConnectFourEnv method)
(coax.wrappers.BoxActionsToDiscrete method)
(coax.wrappers.BoxActionsToReals method)
(coax.wrappers.FrameStacking method)
(coax.wrappers.MetaPolicyEnv method)
(coax.wrappers.TrainMonitor method)
render_episode() (in module coax.utils)
render_mode (coax.wrappers.BoxActionsToDiscrete property)
(coax.wrappers.BoxActionsToReals property)
(coax.wrappers.FrameStacking property)
(coax.wrappers.MetaPolicyEnv property)
(coax.wrappers.TrainMonitor property)
reset() (coax.envs.ConnectFourEnv method)
(coax.reward_tracing.MonteCarlo method)
(coax.reward_tracing.NStep method)
(coax.utils.OrnsteinUhlenbeckNoise method)
(coax.wrappers.BoxActionsToDiscrete method)
(coax.wrappers.BoxActionsToReals method)
(coax.wrappers.FrameStacking method)
(coax.wrappers.MetaPolicyEnv method)
(coax.wrappers.TrainMonitor method)
reset_global() (coax.wrappers.TrainMonitor method)
reward_range (coax.wrappers.BoxActionsToDiscrete property)
(coax.wrappers.BoxActionsToReals property)
(coax.wrappers.FrameStacking property)
(coax.wrappers.MetaPolicyEnv property)
(coax.wrappers.TrainMonitor property)
RewardFunction (class in coax)
root_value (coax.utils.MaxTree property)
(coax.utils.MinTree property)
(coax.utils.SegmentTree property)
(coax.utils.SumTree property)
S
safe_sample() (in module coax.utils)
sample (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
sample() (coax.experience_replay.PrioritizedReplayBuffer method)
(coax.experience_replay.SimpleReplayBuffer method)
(coax.utils.SumTree method)
sample_func (coax.BoltzmannPolicy property)
(coax.EpsilonGreedy property)
(coax.Policy property)
(coax.StochasticV property)
sample_func_type1 (coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
sample_func_type2 (coax.StochasticQ property)
(coax.StochasticRewardFunction property)
(coax.StochasticTransitionModel property)
Sarsa (class in coax.td_learning)
save_counters() (coax.wrappers.TrainMonitor method)
SegmentTree (class in coax.utils)
set_counters() (coax.wrappers.TrainMonitor method)
set_state() (coax.Worker method)
set_values() (coax.utils.MaxTree method)
(coax.utils.MinTree method)
(coax.utils.SegmentTree method)
(coax.utils.SumTree method)
SimpleReplayBuffer (class in coax.experience_replay)
SimpleTD (class in coax.td_learning)
single_to_batch() (in module coax.utils)
soft_update() (coax.Policy method)
(coax.Q method)
(coax.RewardFunction method)
(coax.StochasticQ method)
(coax.StochasticRewardFunction method)
(coax.StochasticTransitionModel method)
(coax.StochasticV method)
(coax.TransitionModel method)
(coax.V method)
SoftClippedDoubleQLearning (class in coax.td_learning)
SoftPG (class in coax.policy_objectives)
SoftQLearning (class in coax.td_learning)
space (coax.proba_dists.CategoricalDist property)
(coax.proba_dists.DiscretizedIntervalDist property)
(coax.proba_dists.EmpiricalQuantileDist property)
(coax.proba_dists.NormalDist property)
(coax.proba_dists.ProbaDist property)
(coax.proba_dists.SquashedNormalDist property)
spec (coax.wrappers.BoxActionsToDiscrete property)
(coax.wrappers.BoxActionsToReals property)
(coax.wrappers.FrameStacking property)
(coax.wrappers.MetaPolicyEnv property)
(coax.wrappers.TrainMonitor property)
SquashedNormalDist (class in coax.proba_dists)
stack_trees() (in module coax.utils)
step() (coax.envs.ConnectFourEnv method)
(coax.wrappers.BoxActionsToDiscrete method)
(coax.wrappers.BoxActionsToReals method)
(coax.wrappers.FrameStacking method)
(coax.wrappers.MetaPolicyEnv method)
(coax.wrappers.TrainMonitor method)
StepwiseLinearFunction (class in coax.utils)
StochasticQ (class in coax)
StochasticRewardFunction (class in coax)
StochasticTransitionModel (class in coax)
StochasticV (class in coax)
SuccessorStateQ (class in coax)
SumTree (class in coax.utils)
sync_shared_params() (in module coax.utils)
T
target_func() (coax.td_learning.SoftClippedDoubleQLearning method)
td_error() (coax.td_learning.ClippedDoubleQLearning method)
(coax.td_learning.DoubleQLearning method)
(coax.td_learning.ExpectedSarsa method)
(coax.td_learning.QLearning method)
(coax.td_learning.Sarsa method)
(coax.td_learning.SimpleTD method)
(coax.td_learning.SoftClippedDoubleQLearning method)
(coax.td_learning.SoftQLearning method)
to_singles() (coax.reward_tracing.TransitionBatch method)
trace() (coax.Worker method)
TrainMonitor (class in coax.wrappers)
transform_func (coax.value_transforms.LogTransform property)
(coax.value_transforms.ValueTransform property)
TransitionBatch (class in coax.reward_tracing)
TransitionModel (class in coax)
tree_ravel() (in module coax.utils)
U
unvectorize() (in module coax.utils)
unwrapped (coax.envs.ConnectFourEnv property)
(coax.wrappers.BoxActionsToDiscrete property)
(coax.wrappers.BoxActionsToReals property)
(coax.wrappers.FrameStacking property)
(coax.wrappers.MetaPolicyEnv property)
(coax.wrappers.TrainMonitor property)
update() (coax.experience_replay.PrioritizedReplayBuffer method)
(coax.model_updaters.ModelUpdater method)
(coax.policy_objectives.DeterministicPG method)
(coax.policy_objectives.PPOClip method)
(coax.policy_objectives.SoftPG method)
(coax.policy_objectives.VanillaPG method)
(coax.td_learning.ClippedDoubleQLearning method)
(coax.td_learning.DoubleQLearning method)
(coax.td_learning.ExpectedSarsa method)
(coax.td_learning.QLearning method)
(coax.td_learning.Sarsa method)
(coax.td_learning.SimpleTD method)
(coax.td_learning.SoftClippedDoubleQLearning method)
(coax.td_learning.SoftQLearning method)
V
V (class in coax)
values (coax.utils.MaxTree property)
(coax.utils.MinTree property)
(coax.utils.SegmentTree property)
(coax.utils.SumTree property)
ValueTransform (class in coax.value_transforms)
VanillaPG (class in coax.policy_objectives)
W
Worker (class in coax)
Read the Docs
v: latest
Versions
latest
stable
Downloads
On Read the Docs
Project Home
Builds