PyTorch PINN: Inaccurate sigma_xx (stress) predictions while other parameters match FEM results

I’m implementing a Physics-Informed Neural Network (PINN) to solve a 2D elasticity problem, comparing results with FEM analysis. While most parameters (ux, uy, sigma_yy, sigma_xy) closely match the FEM results, the sigma_xx predictions are significantly off.
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.path import Path
import pandas as pd
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR
import copy
# Set random seed for reproducibility
torch.manual_seed(0)
np.random.seed(0)

# Check if GPU is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Set default tensor type to float32
torch.set_default_tensor_type(torch.FloatTensor)

# Define the neural network architecture
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.hidden1 = nn.Linear(2, 64)
        self.hidden2 = nn.Linear(64, 128)
        self.hidden3 = nn.Linear(128, 256)
        self.hidden4 = nn.Linear(256, 128)  
        self.hidden5 = nn.Linear(128, 64)
        self.output = nn.Linear(64, 2)
    def forward(self, x):
        x = torch.tanh(self.hidden1(x))
        x = torch.tanh(self.hidden2(x))
        x = torch.tanh(self.hidden3(x))
        x = torch.tanh(self.hidden4(x))
        x = torch.tanh(self.hidden5(x))
        return self.output(x)  # return [u, v]
    # Define the problem domain using the given vertices
vertices = np.array([
    [0, 0],
    [0, 3],
    [3, 3],
    [5, 2],
    [7, 2],
    [7,0],
    [0,0]  # Closing the polygon
],dtype=np.float32)
path = Path(vertices)
def in_domain(x, y):
    points = np.column_stack((x.cpu().numpy(), y.cpu().numpy()))
    return torch.tensor(path.contains_points(points), dtype=torch.bool, device=device)

# Define boundary conditions
def BC_bottom(x, y):
    return ((y == 0) & (x >= 0) & (x <= 7)).squeeze()

def BC_left(x, y):
    return ((x == 0) & (y >= 0) & (y <= 3)).squeeze()

def BC_top(x, y):
    return (((y == 3) & (x >= 0) & (x <= 3)) | 
            ((y - 4.5) * -2 == (x - 0)) & (x > 3) & (x < 5) |
            (y == 2) & (x >= 5) & (x <= 7)).squeeze()

def BC_right(x, y):
    return ((x == 7) & (y >= 0) & (y <= 2)).squeeze()

def BC_Slope(x, y):
    return (((y - 4.5) * -2 == (x - 0)) & (x > 3) & (x < 5)).squeeze()


class HuberLoss:
    def __init__(self, delta=1.0):
        self.delta = delta
        
    def __call__(self, y_true, y_pred):
        error = torch.abs(y_true - y_pred)
        quadratic = torch.min(error, torch.tensor(self.delta))
        linear = error - quadratic
        return torch.mean(0.5 * quadratic ** 2 + self.delta * linear)

class MultiViewPINN:
    def __init__(self):
        # Keep track of learned knowledge from different views
        self.view_knowledge = {}
        self.huber = HuberLoss(delta=1.0)
        
    def transfer_knowledge(self, source_net, target_net, layer_mapping):
        """Transfer weights from source network to target network"""
        for source_layer, target_layer in layer_mapping:
            target_net.state_dict()[target_layer].data.copy_(
                source_net.state_dict()[source_layer].data
            )

def BC(xy, net):
    x, y = xy[:, 0].unsqueeze(1), xy[:, 1].unsqueeze(1)
    
    output = net(xy)
    u = output[:, 0:1]  # แยก output channel แรกเป็น u
    v = output[:, 1:2]  # แยก output channel ที่สองเป็น v
    
    bc_b = BC_bottom(x, y)
    bc_l = BC_left(x, y)
    bc_t = BC_top(x, y)
    bc_r = BC_right(x, y)
    bc_slope = BC_Slope(x, y)
    
    huber = HuberLoss(delta=1.0)
    
    loss = huber(u[bc_b], torch.zeros_like(u[bc_b])) + huber(v[bc_b], torch.zeros_like(v[bc_b]))
    loss += huber(u[bc_l], torch.zeros_like(u[bc_l]))
    loss += huber(u[bc_r], torch.zeros_like(u[bc_r]))
    
    xy_top = xy[bc_t].requires_grad_(True)
    output_top = net(xy_top)
    u_top = output_top[:, 0:1]
    v_top = output_top[:, 1:2]
    
    u_x_top = torch.autograd.grad(u_top.sum(), xy_top, create_graph=True)[0][:, 0]
    u_y_top = torch.autograd.grad(u_top.sum(), xy_top, create_graph=True)[0][:, 1]
    v_x_top = torch.autograd.grad(v_top.sum(), xy_top, create_graph=True)[0][:, 0]
    v_y_top = torch.autograd.grad(v_top.sum(), xy_top, create_graph=True)[0][:, 1]
    
    E = 5.0  
    nu = 0.3 
    
    sigma_xx_top = (E / ((1 + nu) * (1 - 2 * nu))) * (u_x_top + nu * v_y_top)
    sigma_yy_top = (E / ((1 + nu) * (1 - 2 * nu))) * (v_y_top + nu * u_x_top)
    sigma_xy_top = E / (2 * (1 + nu)) * (u_y_top + v_x_top)
    
    loss += huber(sigma_xy_top, torch.zeros_like(sigma_xy_top))
    loss += huber(sigma_xx_top, torch.zeros_like(sigma_xx_top))
    loss += huber(sigma_yy_top, torch.zeros_like(sigma_yy_top))
    
    xy_right = xy[bc_r].requires_grad_(True)
    output_right = net(xy_right)
    u_right = output_right[:, 0:1]
    v_right = output_right[:, 1:2]
    
    u_y_right = torch.autograd.grad(u_right.sum(), xy_right, create_graph=True)[0][:, 1]
    v_x_right = torch.autograd.grad(v_right.sum(), xy_right, create_graph=True)[0][:, 0]
    
    sigma_xy_right = E / (2 * (1 + nu)) * (u_y_right + v_x_right)
    loss += huber(sigma_xy_right, torch.zeros_like(sigma_xy_right))

    xy_left = xy[bc_l].requires_grad_(True)
    output_left = net(xy_left)
    u_left = output_left[:, 0:1]
    v_left = output_left[:, 1:2]
    
    u_y_left = torch.autograd.grad(u_left.sum(), xy_left, create_graph=True)[0][:, 1]
    v_x_left = torch.autograd.grad(v_left.sum(), xy_left, create_graph=True)[0][:, 0]
    
    sigma_xy_left = E / (2 * (1 + nu)) * (u_y_left + v_x_left)
    loss += huber(sigma_xy_left, torch.zeros_like(sigma_xy_left))

    return loss


def load_fem_data(filename="FEM2_data.csv"):
    df = pd.read_csv(filename)
    x = torch.tensor(df['X'].values, dtype=torch.float32).unsqueeze(1).to(device)
    y = torch.tensor(df['Y'].values, dtype=torch.float32).unsqueeze(1).to(device)
    return x, y


def generate_training_data(fem_data_file, n_boundary=2000):
    x, y = load_fem_data(fem_data_file)
    
    # Keep only points inside the domain
    mask = in_domain(x, y)
    x, y = x[mask], y[mask]
    
    # Generate boundary points
    t = torch.linspace(0, 1, n_boundary, device=device).unsqueeze(1)
    
    # Define the boundary segments
    segments = [
        ([0, 0, 0], [0, 3, 3]),  # Left boundary
        ([0, 3, 5, 7, 7], [3, 3, 2, 2, 0]),  # Top and right boundary
        ([7, 0], [0, 0])  # Bottom boundary
    ]
    
    x_b = []
    y_b = []
    
    for segment in segments:
        x_seg = torch.tensor(np.interp(t.cpu().numpy(), np.linspace(0, 1, len(segment[0])), segment[0]), dtype=torch.float32, device=device)
        y_seg = torch.tensor(np.interp(t.cpu().numpy(), np.linspace(0, 1, len(segment[1])), segment[1]), dtype=torch.float32, device=device)
        x_b.append(x_seg)
        y_b.append(y_seg)
    
    x_b = torch.cat(x_b)
    y_b = torch.cat(y_b)
    
    return x, y, x_b, y_b


def PDE(x, y, net):
    xy = torch.cat([x, y], dim=1)
    xy.requires_grad = True
    
    output = net(xy)
    u = output[:, 0:1]
    v = output[:, 1:2]
    
    u_x = torch.autograd.grad(u.sum(), xy, create_graph=True)[0][:, 0].unsqueeze(1)
    u_y = torch.autograd.grad(u.sum(), xy, create_graph=True)[0][:, 1].unsqueeze(1)
    v_x = torch.autograd.grad(v.sum(), xy, create_graph=True)[0][:, 0].unsqueeze(1)
    v_y = torch.autograd.grad(v.sum(), xy, create_graph=True)[0][:, 1].unsqueeze(1)
    
    E = 5
    nu = 0.3
    gamma = 1.8
    
    sigma_xx = E / (1 - nu**2) * (u_x + nu * v_y)
    sigma_yy = E / (1 - nu**2) * (v_y + nu * u_x)
    sigma_xy = E / (2 * (1 + nu)) * (u_y + v_x)
    
    f_x = torch.zeros_like(x)
    f_y = -gamma * torch.ones_like(y)
    
    R_x = torch.autograd.grad(sigma_xx.sum(), xy, create_graph=True)[0][:, 0].unsqueeze(1) + \
          torch.autograd.grad(sigma_xy.sum(), xy, create_graph=True)[0][:, 1].unsqueeze(1) + f_x
    R_y = torch.autograd.grad(sigma_xy.sum(), xy, create_graph=True)[0][:, 0].unsqueeze(1) + \
          torch.autograd.grad(sigma_yy.sum(), xy, create_graph=True)[0][:, 1].unsqueeze(1) + f_y
    
    huber = HuberLoss(delta=1.0)
    loss_x = huber(R_x, torch.zeros_like(R_x))
    loss_y = huber(R_y, torch.zeros_like(R_y))
    
    return loss_x, loss_y


def train(net, optimizer, n_epochs, fem_data_file):
    multi_view = MultiViewPINN()
    huber = HuberLoss(delta=1.0)
    
    scheduler = CosineAnnealingLR(optimizer, T_max=n_epochs, eta_min=1e-6)
    
    best_loss = float('inf')
    patience = 0
    max_patience = 3000
    
    prev_net = None
    
    for epoch in range(n_epochs):
        optimizer.zero_grad()
        
        x, y, x_b, y_b = generate_training_data(fem_data_file, 30000)
        xy = torch.cat([x, y], dim=1)
        xy_b = torch.cat([x_b, y_b], dim=1)
        
        if prev_net is not None:
            layer_mapping = [
                ('hidden1.weight', 'hidden1.weight'),
                ('hidden2.weight', 'hidden2.weight'),
                ('hidden3.weight', 'hidden3.weight')
            ]
            multi_view.transfer_knowledge(prev_net, net, layer_mapping)
        
        loss_pde_x, loss_pde_y = PDE(x, y, net)
        loss_bc = BC(xy_b, net)
        
        loss = loss_pde_x + 2 * loss_pde_y + loss_bc
        
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(net.parameters(), max_norm=0.5)
        
        optimizer.step()
        scheduler.step()
        
        if loss < best_loss:
            best_loss = loss
            patience = 0
            
            prev_net = copy.deepcopy(net)
            
            torch.save({
                'net_state_dict': net.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'scheduler_state_dict': scheduler.state_dict(),
                'loss': loss,
                'epoch': epoch,
            }, 'best_model.pth')
        else:
            patience += 1
            if patience > max_patience:
                print(f"Early stopping at epoch {epoch}")
                break
                
        if (epoch + 1) % 10 == 0:
            print(f'Epoch {epoch+1}/{n_epochs}, Loss: {loss.item():.6f}, '
                  f'PDE_x: {loss_pde_x.item():.6f}, PDE_y: {loss_pde_y.item():.6f}, '
                  f'BC: {loss_bc.item():.6f}, LR: {scheduler.get_last_lr()[0]:.6f}')
[Stress yy comparison with FEM]
Stress xx comparison with FEM
I’ve tried several approaches to resolve the sigma_xx accuracy issue:
Adjusted the network architecture (tried different layer sizes and depths)
Increased the number of training points near boundaries
Implemented Huber loss instead of MSE for better stability
sigma_xx values should be in a similar range to FEM results
Other parameters (ux, uy, sigma_yy, sigma_xy) match FEM within ~5% error
You need to sign in to view this answers
Related Post