TianShou
0.1
Tutorials:
DDPG (Deep Deterministic Policy Gradient) with TianShou
Make an Environment
Build the Networks
Construct Optimization Methods
Specify Data Acquisition
Start Training!
API Docs
tianshou.core.policy
Base class
Deterministic policy
Distributional policy
DQN policy
tianshou.core.value_function
Base class
State value
Action value
tianshou.core.losses
tianshou.core.opt
tianshou.core.random
tianshou.core.utils
tianshou.data.data_buffer
Base class
Batch set
Replay buffer base
Vanilla replay buffer
tianshou.data.advantage_estimation
tianshou.data.data_collector
tianshou.data.tester
TianShou
Docs
»
Index
Index
_
|
A
|
B
|
C
|
D
|
E
|
F
|
G
|
N
|
O
|
P
|
Q
|
R
|
S
|
T
|
V
_
__call__() (tianshou.data.advantage_estimation.ddpg_return method)
(tianshou.data.advantage_estimation.nstep_q_return method)
(tianshou.data.advantage_estimation.nstep_return method)
A
act() (tianshou.core.policy.deterministic.Deterministic method)
(tianshou.core.policy.distributional.Distributional method)
(tianshou.core.policy.dqn.DQN method)
act_test() (tianshou.core.policy.deterministic.Deterministic method)
(tianshou.core.policy.distributional.Distributional method)
(tianshou.core.policy.dqn.DQN method)
ActionValue (class in tianshou.core.value_function.action_value)
add() (tianshou.data.data_buffer.batch_set.BatchSet method)
(tianshou.data.data_buffer.vanilla.VanillaReplayBuffer method)
B
BatchSet (class in tianshou.data.data_buffer.batch_set)
C
clear() (tianshou.data.data_buffer.batch_set.BatchSet method)
(tianshou.data.data_buffer.vanilla.VanillaReplayBuffer method)
collect() (tianshou.data.data_collector.DataCollector method)
D
DataCollector (class in tianshou.data.data_collector)
ddpg_return (class in tianshou.data.advantage_estimation)
denoise_action() (tianshou.data.data_collector.DataCollector method)
Deterministic (class in tianshou.core.policy.deterministic)
Distributional (class in tianshou.core.policy.distributional)
DPG() (in module tianshou.core.opt)
DQN (class in tianshou.core.policy.dqn)
(class in tianshou.core.value_function.action_value)
E
eval_action() (tianshou.core.policy.deterministic.Deterministic method)
eval_action_old() (tianshou.core.policy.deterministic.Deterministic method)
eval_value() (tianshou.core.value_function.action_value.ActionValue method)
(tianshou.core.value_function.action_value.DQN method)
(tianshou.core.value_function.state_value.StateValue method)
eval_value_all_actions() (tianshou.core.value_function.action_value.DQN method)
eval_value_all_actions_old() (tianshou.core.value_function.action_value.DQN method)
eval_value_old() (tianshou.core.value_function.action_value.ActionValue method)
(tianshou.core.value_function.action_value.DQN method)
(tianshou.core.value_function.state_value.StateValue method)
F
full_return() (in module tianshou.data.advantage_estimation)
G
GaussianWhiteNoiseProcess (class in tianshou.core.random)
get_soft_update_op() (in module tianshou.core.utils)
N
next_batch() (tianshou.data.data_collector.DataCollector method)
nstep_q_return (class in tianshou.data.advantage_estimation)
nstep_return (class in tianshou.data.advantage_estimation)
O
OrnsteinUhlenbeckProcess (class in tianshou.core.random)
P
ppo_clip() (in module tianshou.core.losses)
Q
q_net (tianshou.core.policy.dqn.DQN attribute)
R
REINFORCE() (in module tianshou.core.losses)
remove() (tianshou.data.data_buffer.vanilla.VanillaReplayBuffer method)
reset() (tianshou.core.policy.deterministic.Deterministic method)
(tianshou.core.policy.distributional.Distributional method)
(tianshou.core.policy.dqn.DQN method)
reset_states() (tianshou.core.random.OrnsteinUhlenbeckProcess method)
S
sample() (tianshou.core.random.GaussianWhiteNoiseProcess method)
(tianshou.core.random.OrnsteinUhlenbeckProcess method)
(tianshou.data.data_buffer.batch_set.BatchSet method)
(tianshou.data.data_buffer.vanilla.VanillaReplayBuffer method)
set_epsilon_test() (tianshou.core.policy.dqn.DQN method)
set_epsilon_train() (tianshou.core.policy.dqn.DQN method)
StateValue (class in tianshou.core.value_function.state_value)
statistics() (tianshou.data.data_buffer.batch_set.BatchSet method)
sync_weights() (tianshou.core.policy.deterministic.Deterministic method)
(tianshou.core.policy.distributional.Distributional method)
(tianshou.core.policy.dqn.DQN method)
(tianshou.core.value_function.action_value.ActionValue method)
(tianshou.core.value_function.action_value.DQN method)
(tianshou.core.value_function.state_value.StateValue method)
T
test_policy_in_env() (in module tianshou.data.tester)
tianshou.core.losses (module)
tianshou.core.opt (module)
tianshou.core.policy (module)
tianshou.core.policy.base (module)
tianshou.core.policy.deterministic (module)
tianshou.core.policy.distributional (module)
tianshou.core.policy.dqn (module)
tianshou.core.random (module)
tianshou.core.utils (module)
tianshou.core.value_function (module)
tianshou.core.value_function.action_value (module)
tianshou.core.value_function.base (module)
tianshou.core.value_function.state_value (module)
tianshou.data.advantage_estimation (module)
tianshou.data.data_buffer (module)
tianshou.data.data_buffer.base (module)
tianshou.data.data_buffer.batch_set (module)
tianshou.data.data_buffer.replay_buffer_base (module)
tianshou.data.data_buffer.vanilla (module)
tianshou.data.data_collector (module)
tianshou.data.tester (module)
trainable_variables (tianshou.core.policy.deterministic.Deterministic attribute)
(tianshou.core.policy.distributional.Distributional attribute)
(tianshou.core.value_function.action_value.ActionValue attribute)
(tianshou.core.value_function.action_value.DQN attribute)
(tianshou.core.value_function.state_value.StateValue attribute)
V
value_mse() (in module tianshou.core.losses)
value_tensor (tianshou.core.value_function.action_value.ActionValue attribute)
(tianshou.core.value_function.action_value.DQN attribute)
(tianshou.core.value_function.state_value.StateValue attribute)
value_tensor_all_actions (tianshou.core.value_function.action_value.DQN attribute)
VanillaReplayBuffer (class in tianshou.data.data_buffer.vanilla)