OiO.lk Community platform!

Oio.lk is an excellent forum for developers, providing a wide range of resources, discussions, and support for those in the developer community. Join oio.lk today to connect with like-minded professionals, share insights, and stay updated on the latest trends and technologies in the development field.
  You need to log in or register to access the solved answers to this problem.
  • You have reached the maximum number of guest views allowed
  • Please register below to remove this limitation

ValueError: Not enough values to unpack in an RL environment using Gymnasium

  • Thread starter Thread starter Paarth Jha
  • Start date Start date
P

Paarth Jha

Guest
Code:
from TTset_main import load_TTset

# File paths for training and validation data
filted_tset_path = "path.csv"
filted_vset_path = "path.csv"

# Load training and validation sets
TTset_training, TTset_vali, training_data_length = load_TTset(filted_tset_path, filted_vset_path)

# Extract frame tensors and labels from the training set
frame_tensors = []
labels = []

for inputs, targets in TTset_training:
    frame_tensors.append(inputs)
    labels.append(targets)

# Print dimensions of the frame tensors and labels
print("Inputs")
print("Dimension 1: ", len(frame_tensors)) #1927
print("Dimension 2: ", len(frame_tensors[0])) #1
print("Dimension 3: ", len(frame_tensors[0][0])) #3
print("Dimension 4: ", len(frame_tensors[0][0][0])) #60
print("Dimension 5: ", len(frame_tensors[0][0][0][0])) #230
print("Dimension 6: ", len(frame_tensors[0][0][0][0][0])) #230
print()
print("Training data length:", training_data_length)
print("Targets")
print("Dimension 1: ", len(labels)) #1927
print("Dimension 2: ", len(labels[0])) #1

import numpy as np
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback
from tqdm import tqdm

# Custom gym environment for table tennis
class TableTennisEnv(gym.Env):
    def __init__(self, frame_tensors, labels, frame_size=(3, 60, 230, 230)):
        super(TableTennisEnv, self).__init__()
        self.frame_tensors = frame_tensors
        self.labels = labels
        self.current_step = 0
        self.current_substep = 0
        self.frame_size = frame_size

        self.n_actions = 20  # Number of unique actions
        self.observation_space = spaces.Box(low=0, high=255, shape=(3, 60, 230, 230), dtype=np.float32)
        self.action_space = spaces.Discrete(self.n_actions)
        self.normalize_images = False

    def reset(self, seed=None):
        self.current_step = 0
        self.current_substep = 0
        return self.frame_tensors[self.current_step][self.current_substep], {}

    def step(self, action):
        reward = 1 if action == self.labels[self.current_step][self.current_substep] else -1
        self.current_substep += 1
        
        if self.current_substep >= len(self.frame_tensors[self.current_step]):
            self.current_substep = 0
            self.current_step += 1
        
        done = self.current_step >= len(self.frame_tensors)
        
        obs = self.frame_tensors[self.current_step][self.current_substep] if not done else np.zeros_like(self.frame_tensors[0][0])
        
        return obs, reward, done, {}

    def render(self, mode='human'):
        pass

# Reduce memory usage by processing in smaller batches
env = DummyVecEnv([lambda: TableTennisEnv(frame_tensors, labels, frame_size=(3, 60, 230, 230))])

# Callback for progress bar during training
class ProgressBarCallback(BaseCallback):
    def __init__(self, total_timesteps, verbose=0):
        super(ProgressBarCallback, self).__init__(verbose)
        self.total_timesteps = total_timesteps
        self.pbar = None

    def _on_training_start(self):
        self.pbar = tqdm(total=self.total_timesteps)

    def _on_step(self):
        self.pbar.update(self.model.n_steps)
        return True

    def _on_training_end(self):
        self.pbar.close()

# Set total timesteps for training
total_timesteps = 2048  # Adjust timesteps as needed

# Initialize PPO model with a smaller batch size
model = PPO("MlpPolicy", env, verbose=1, n_steps=64, tensorboard_log="./ppo_tt_tensorboard/")  # Reduce batch size
progress_bar_callback = ProgressBarCallback(total_timesteps=total_timesteps)

# Train the model
model.learn(total_timesteps=total_timesteps, callback=progress_bar_callback)

# Save the trained model
model.save("ppo_table_tennis")

I'm using the above code to train an RL model for classifying videos. The videos are input as tensors. The frame_tensors has dimensions 1927x1x3x60x230x230, and the labels has dimensions 1927x1. A single video is represented in 3x60x230x230 dimensions. I'm getting the following error:

Code:
Logging to ./ppo_tt_tensorboard/PPO_5
  0%|                                                                                                                                                                                 | 0/2048 [00:00<?, ?it/s]Traceback (most recent call last):
  File "d:/PJ/TTARnet_v2_to_lab_兆仁code/prototype.py", line 105, in <module>
    model.learn(total_timesteps=total_timesteps, callback=progress_bar_callback)
  File "C:\Users\User1\anaconda3\envs\ML\lib\site-packages\stable_baselines3\ppo\ppo.py", line 315, in learn
    return super().learn(
  File "C:\Users\User1\anaconda3\envs\ML\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py", line 300, in learn
    continue_training = self.collect_rollouts(self.env, callback, self.rollout_buffer, n_rollout_steps=self.n_steps)
  File "C:\Users\User1\anaconda3\envs\ML\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py", line 195, in collect_rollouts
    new_obs, rewards, dones, infos = env.step(clipped_actions)
  File "C:\Users\User1\anaconda3\envs\ML\lib\site-packages\stable_baselines3\common\vec_env\base_vec_env.py", line 206, in step
    return self.step_wait()
  File "C:\Users\User1\anaconda3\envs\ML\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py", line 58, in step_wait
    obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step(
ValueError: not enough values to unpack (expected 5, got 4)

This is my first time using the gymnasium library so I'm not very familiar with it, apologies. I asked a few colleagues about this error, and they suggested that my 'step' function is only returning 4 values, it should return 5 and the 5th value is usually a boolean. I tried that but I still got the same error.

I asked a few colleagues about this error, and they suggested that my 'step' function is only returning 4 values, it should return 5 and the 5th value is usually a boolean. I tried that but I still got the same error.
<pre><code>from TTset_main import load_TTset

# File paths for training and validation data
filted_tset_path = "path.csv"
filted_vset_path = "path.csv"

# Load training and validation sets
TTset_training, TTset_vali, training_data_length = load_TTset(filted_tset_path, filted_vset_path)

# Extract frame tensors and labels from the training set
frame_tensors = []
labels = []

for inputs, targets in TTset_training:
frame_tensors.append(inputs)
labels.append(targets)

# Print dimensions of the frame tensors and labels
print("Inputs")
print("Dimension 1: ", len(frame_tensors)) #1927
print("Dimension 2: ", len(frame_tensors[0])) #1
print("Dimension 3: ", len(frame_tensors[0][0])) #3
print("Dimension 4: ", len(frame_tensors[0][0][0])) #60
print("Dimension 5: ", len(frame_tensors[0][0][0][0])) #230
print("Dimension 6: ", len(frame_tensors[0][0][0][0][0])) #230
print()
print("Training data length:", training_data_length)
print("Targets")
print("Dimension 1: ", len(labels)) #1927
print("Dimension 2: ", len(labels[0])) #1

import numpy as np
import gymnasium as gym
from gymnasium import spaces
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.callbacks import BaseCallback
from tqdm import tqdm

# Custom gym environment for table tennis
class TableTennisEnv(gym.Env):
def __init__(self, frame_tensors, labels, frame_size=(3, 60, 230, 230)):
super(TableTennisEnv, self).__init__()
self.frame_tensors = frame_tensors
self.labels = labels
self.current_step = 0
self.current_substep = 0
self.frame_size = frame_size

self.n_actions = 20 # Number of unique actions
self.observation_space = spaces.Box(low=0, high=255, shape=(3, 60, 230, 230), dtype=np.float32)
self.action_space = spaces.Discrete(self.n_actions)
self.normalize_images = False

def reset(self, seed=None):
self.current_step = 0
self.current_substep = 0
return self.frame_tensors[self.current_step][self.current_substep], {}

def step(self, action):
reward = 1 if action == self.labels[self.current_step][self.current_substep] else -1
self.current_substep += 1

if self.current_substep >= len(self.frame_tensors[self.current_step]):
self.current_substep = 0
self.current_step += 1

done = self.current_step >= len(self.frame_tensors)

obs = self.frame_tensors[self.current_step][self.current_substep] if not done else np.zeros_like(self.frame_tensors[0][0])

return obs, reward, done, {}

def render(self, mode='human'):
pass

# Reduce memory usage by processing in smaller batches
env = DummyVecEnv([lambda: TableTennisEnv(frame_tensors, labels, frame_size=(3, 60, 230, 230))])

# Callback for progress bar during training
class ProgressBarCallback(BaseCallback):
def __init__(self, total_timesteps, verbose=0):
super(ProgressBarCallback, self).__init__(verbose)
self.total_timesteps = total_timesteps
self.pbar = None

def _on_training_start(self):
self.pbar = tqdm(total=self.total_timesteps)

def _on_step(self):
self.pbar.update(self.model.n_steps)
return True

def _on_training_end(self):
self.pbar.close()

# Set total timesteps for training
total_timesteps = 2048 # Adjust timesteps as needed

# Initialize PPO model with a smaller batch size
model = PPO("MlpPolicy", env, verbose=1, n_steps=64, tensorboard_log="./ppo_tt_tensorboard/") # Reduce batch size
progress_bar_callback = ProgressBarCallback(total_timesteps=total_timesteps)

# Train the model
model.learn(total_timesteps=total_timesteps, callback=progress_bar_callback)

# Save the trained model
model.save("ppo_table_tennis")

</code></pre>
<p>I'm using the above code to train an RL model for classifying videos. The videos are input as tensors. The frame_tensors has dimensions 1927x1x3x60x230x230, and the labels has dimensions 1927x1. A single video is represented in 3x60x230x230 dimensions. I'm getting the following error:</p>
<pre><code>Logging to ./ppo_tt_tensorboard/PPO_5
0%| | 0/2048 [00:00<?, ?it/s]Traceback (most recent call last):
File "d:/PJ/TTARnet_v2_to_lab_兆仁code/prototype.py", line 105, in <module>
model.learn(total_timesteps=total_timesteps, callback=progress_bar_callback)
File "C:\Users\User1\anaconda3\envs\ML\lib\site-packages\stable_baselines3\ppo\ppo.py", line 315, in learn
return super().learn(
File "C:\Users\User1\anaconda3\envs\ML\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py", line 300, in learn
continue_training = self.collect_rollouts(self.env, callback, self.rollout_buffer, n_rollout_steps=self.n_steps)
File "C:\Users\User1\anaconda3\envs\ML\lib\site-packages\stable_baselines3\common\on_policy_algorithm.py", line 195, in collect_rollouts
new_obs, rewards, dones, infos = env.step(clipped_actions)
File "C:\Users\User1\anaconda3\envs\ML\lib\site-packages\stable_baselines3\common\vec_env\base_vec_env.py", line 206, in step
return self.step_wait()
File "C:\Users\User1\anaconda3\envs\ML\lib\site-packages\stable_baselines3\common\vec_env\dummy_vec_env.py", line 58, in step_wait
obs, self.buf_rews[env_idx], terminated, truncated, self.buf_infos[env_idx] = self.envs[env_idx].step(
ValueError: not enough values to unpack (expected 5, got 4)
</code></pre>
<p>This is my first time using the gymnasium library so I'm not very familiar with it, apologies. I asked a few colleagues about this error, and they suggested that my 'step' function is only returning 4 values, it should return 5 and the 5th value is usually a boolean. I tried that but I still got the same error.</p>
<p>I asked a few colleagues about this error, and they suggested that my 'step' function is only returning 4 values, it should return 5 and the 5th value is usually a boolean. I tried that but I still got the same error.</p>
 

Latest posts

Top