next.js: [Error 500] "Socket Hang Up" Randomly Occurring on any Routes in Production Mode
Verify canary release
- I verified that the issue exists in the latest Next.js canary release
Provide environment information
Operating System:
Platform: darwin
Arch: x64
Version: Darwin Kernel Version 21.6.0: Mon Aug 22 20:17:10 PDT 2022; root:xnu-8020.140.49~2/RELEASE_X86_64
Binaries:
Node: 16.14.2
npm: 8.5.0
Yarn: 1.22.15
pnpm: 6.11.0
Relevant packages:
next: 13.4.6
eslint-config-next: 13.4.2
react: 18.2.0
react-dom: 18.2.0
typescript: 4.9.5
Which area(s) of Next.js are affected? (leave empty if unsure)
No response
Link to the code that reproduces this issue or a replay of the bug
not possible confidential
To Reproduce
this our package.json
`{
"name": "********",
"version": "0.1.0",
"private": true,
"scripts": {
"dev": "next dev",
"dev-https": "NODE_TLS_REJECT_UNAUTHORIZED='0' node server.js",
"ngrok": "ngrok http https://localhost:3000",
"build": "next build",
"postbuild": "next-sitemap",
"start": "next start",
"clean": "rimraf .next out",
"lint": "next lint",
"lint.fix": "next lint --fix",
"test": "jest --watch",
"prepare": "husky install",
"analyze": "ANALYZE=true next build"
},
"dependencies": {
"@everipedia/wagmi-magic-connector": "^0.12.1",
"@headlessui/react": "^1.7.15",
"@headlessui/tailwindcss": "^0.1.3",
"@heroicons/react": "^1.0.6",
"@next/bundle-analyzer": "^12.2.0",
"@next/env": "^13.1.5",
"@radix-ui/react-dropdown-menu": "^2.0.5",
"@rainbow-me/rainbowkit": "^0.12.15",
"@ramp-network/ramp-instant-sdk": "^4.0.2",
"@react-spring/web": "^9.6.1",
"@react-three/cannon": "^6.4.0",
"@react-three/drei": "^9.34.3",
"@react-three/fiber": "^8.8.10",
"@segment/analytics-next": "^1.52.0",
"@sentry/nextjs": "^7.54.0",
"@stripe/react-stripe-js": "^1.16.3",
"@stripe/stripe-js": "^1.46.0",
"@tanstack/react-table": "^8.5.13",
"@use-gesture/react": "^10.2.19",
"axios": "^1.4.0",
"clsx": "^1.2.1",
"cookies-next": "^2.1.1",
"date-fns": "^2.29.3",
"ethers": "^5.7.1",
"i18next": "^22.4.9",
"next": "^13.4.6",
"next-auth": "^4.21.1",
"next-axiom": "^0.17.0",
"next-i18next": "^11.3.0",
"next-password-protect": "^1.8.0",
"next-share": "^0.18.2",
"next-sitemap": "^3.1.47",
"nextjs-progressbar": "^0.0.14",
"react": "^18.2.0",
"react-canvas-confetti": "^1.3.0",
"react-countup": "^6.4.0",
"react-csv": "^2.2.2",
"react-currency-input-field": "^3.6.10",
"react-device-detect": "^2.2.3",
"react-div-100vh": "^0.7.0",
"react-dom": "^18.2.0",
"react-fast-marquee": "^1.3.5",
"react-hook-form": "^7.41.5",
"react-hot-toast": "^2.4.0",
"react-i18next": "^12.1.4",
"react-icons": "^4.8.0",
"react-infinite-scroll-component": "^6.1.0",
"react-intersection-observer": "^9.4.1",
"react-spring-bottom-sheet": "^3.5.0-alpha.0",
"react-type-animation": "^2.1.1",
"react-use-intercom": "^3.0.2",
"recharts": "2.5.0",
"sharp": "^0.30.7",
"swiper": "^9.1.1",
"swr": "1.3.0",
"tailwind-merge": "^1.13.1",
"tailwind-scrollbar": "^3.0.0",
"tailwind-scrollbar-hide": "^1.1.7",
"tailwindcss": "^3.1.4",
"three": "^0.144.0",
"uuid": "^9.0.0",
"wagmi": "^0.12.12"
},
"devDependencies": {
"@commitlint/cli": "^17.0.3",
"@commitlint/config-conventional": "^17.3.0",
"@testing-library/jest-dom": "^5.16.4",
"@testing-library/react": "^13.3.0",
"@types/jest": "^28.1.4",
"@types/node": "18.0.0",
"@types/react": "18.0.14",
"@types/react-csv": "^1.1.3",
"@types/react-dom": "18.0.5",
"@types/react-stripe-elements": "^6.0.6",
"@types/three": "^0.143.0",
"@types/uuid": "^8.3.4",
"@typescript-eslint/eslint-plugin": "^5.30.0",
"@typescript-eslint/parser": "^5.30.0",
"autoprefixer": "^10.4.7",
"commitizen": "^4.2.6",
"commitlint": "^11.0.0",
"commitlint-config-gitmoji": "2.2.5",
"cssnano": "^5.1.12",
"cz-conventional-changelog": "^3.3.0",
"eslint": "8.18.0",
"eslint-config-airbnb-base": "^15.0.0",
"eslint-config-airbnb-typescript": "^17.0.0",
"eslint-config-next": "^13.3.0",
"eslint-config-prettier": "^8.5.0",
"eslint-plugin-import": "^2.26.0",
"eslint-plugin-jsx-a11y": "^6.6.0",
"eslint-plugin-prettier": "^4.1.0",
"eslint-plugin-react": "^7.30.1",
"eslint-plugin-react-hooks": "^4.6.0",
"eslint-plugin-simple-import-sort": "^7.0.0",
"eslint-plugin-tailwindcss": "^3.6.0",
"eslint-plugin-unused-imports": "^2.0.0",
"husky": "^8.0.0",
"jest": "^28.1.2",
"jest-environment-jsdom": "^28.1.2",
"lint-staged": "^13.0.3",
"postcss": "^8.4.14",
"prettier": "^2.7.1",
"rimraf": "^3.0.2",
"typescript": "^4.9.5"
},
"config": {
"commitizen": {
"path": "./node_modules/cz-conventional-changelog"
}
}
}`
our next.config.js :
`/** @type {import('next').NextConfig} */
const { withSentryConfig } = require('@sentry/nextjs');
const { withAxiom } = require('next-axiom');
const withBundleAnalyzer = require('@next/bundle-analyzer')({
enabled: process.env.ANALYZE === 'true',
});
const { i18n } = require('./next-i18next.config');
const IS_PROTECTED = process.env.NEXT_PUBLIC_NODE_ENV === 'staging';
const securityHeaders = [
{
key: 'X-XSS-Protection',
value: '1; mode=block',
},
{
key: 'X-Content-Type-Options',
value: 'nosniff',
},
{
key: 'Referrer-Policy',
value: 'origin-when-cross-origin',
},
{
key: 'X-DNS-Prefetch-Control',
value: 'on',
},
{
key: 'Strict-Transport-Security',
value: 'max-age=63072000; includeSubDomains; preload',
},
];
const nextConfig = withAxiom(
withBundleAnalyzer({
reactStrictMode: true,
swcMinify: false,
i18n,
env: {
PASSWORD_PROTECT: IS_PROTECTED,
},
images: {
domains: ['lh3.googleusercontent.com', 'i.scdn.co'],
},
sentry: {
widenClientFileUpload: true,
hideSourceMaps: true,
automaticVercelMonitors: false,
},
// transpilePackages: ['react-native'],
async redirects() {
return [
{
source: '/login',
destination: '/auth/login',
permanent: true,
},
{
source: '/signup',
destination: '/auth/signup',
permanent: true,
},
{
source: '/dashboard',
destination: '/users/dashboard',
permanent: true,
},
{
source: '/backstage',
destination: '/artists/backstage',
permanent: true,
},
{
source: '/explore',
destination: '/search',
permanent: true,
},
{
source: '/faqs',
destination: '/faq',
permanent: true,
},
{
source: '/users/reward-tasks',
destination: '/users/game/explain',
permanent: true,
},
];
},
async headers() {
return [
{
source: '/:path*',
headers: securityHeaders,
},
{
source: '/.well-known/apple-developer-merchantid-domain-association',
headers: [{ key: 'Content-Type', value: 'application/json' }],
},
];
},
webpack: (config) => {
config.module.rules.push({
test: /\.pdf$/,
use: {
loader: 'file-loader',
options: {
name: '[path][name].[ext]',
},
},
});
// config.externals.push('react-native');
return config;
},
})
);
const sentryWebpackPluginOptions = {
org: '*****-*****',
project: '*****-nextjs',
silent: true, // Suppresses all logs
// For all available options, see:
// https://github.com/getsentry/sentry-webpack-plugin#options.
};
module.exports = withSentryConfig(nextConfig, sentryWebpackPluginOptions);
our middleware.ts
/* eslint-disable consistent-return */
import type { NextRequest } from 'next/server';
import { NextResponse } from 'next/server';
import { withAuth } from 'next-auth/middleware';
const ROLES_ALLOWED_TO_AUTH = new Set<any>(['artist', 'user']);
export default withAuth(
function middleware(req: NextRequest & { nextauth: { token: any } }) {s
// Redirect if they don't have the appropriate role
if (
req.nextUrl.pathname.startsWith('/artists/backstage') ||
req.nextUrl.pathname.startsWith('/artists/onboarding') ||
req.nextUrl.pathname.startsWith('/artists/new')
) {
if (!ROLES_ALLOWED_TO_AUTH.has(req.nextauth.token?.userRole)) {
return NextResponse.redirect(new URL('/auth/login', req.url));
}
if (req.nextauth.token?.userRole === 'user' && req.nextauth.token?.userRole !== 'artist') {
return NextResponse.redirect(new URL('/users/dashboard', req.url));
}
if (req.nextauth.token?.userRole === 'artist') {
return NextResponse.next();
}
}
},
{
callbacks: {
authorized: ({ token }) =>
token?.userRole !== undefined && ROLES_ALLOWED_TO_AUTH.has(token.userRole),
},
}
);
export const config = {
matcher: [
'/feed',
'/artists/new/:path*',
'/artists/backstage/:path*',
'/artists/onboarding/:path*',
'/users/dashboard/:path*',
'/users/game/:path*',
'/users/settings',
],
};
Describe the Bug
We are experiencing a bug that occurs randomly for some of our users, only in production, on any route of the site, and it has never been reported on Sentry. We can only see it in the Vercel logs.
The full error message is as follows:
Uncaught Exception {"errorType":"Error","errorMessage":"socket hang up","code":"ECONNRESET","stack":["Error: socket hang up"," at connResetException (node:internal/errors:717:14)"," at TLSSocket.socketOnEnd (node:_http_client:526:23)"," at TLSSocket.emit (node:events:525:35)"," at TLSSocket.emit (node:domain:489:12)"," at endReadableNT (node:internal/streams/readable:1359:12)"," at process.processTicksAndRejections (node:internal/process/task_queues:82:21)"]} Unknown application error occurred Runtime.Unknown.
We think (but can’t verify) that this bug appeared when we updated to Next.js 13. However, none of our pages use appRouter; we’re still using Page Router for the time being. We’ve seen that rewrites can cause socket hangs, but as you can see in our next.config.js, we don’t use rewrites.
This can happen on SSG (Static Site Generation), SSR (Server-Side Rendering), or Client-side rendered pages. It can also happen on any browser or device.
Honestly, we have no clue or way of reproducing this problem because even in our development environment, we don’t encounter any problems.
Expected Behavior
I expect the application to work seamlessly without any errors or disruptions. Specifically, I anticipate that the mentioned “Socket Hang Up” error will not occur randomly in production mode on any route of the site. Additionally, I hope that better error handling mechanisms will be implemented to address any potential issues that may arise.
Which browser are you using? (if relevant)
No response
How are you deploying your application? (if relevant)
Vercel
About this issue
- Original URL
- State: closed
- Created a year ago
- Reactions: 37
- Comments: 71 (10 by maintainers)
I had the same issue, with
next@13.4.x
+next-auth@4.x.x
, deployed to ECS in astandalone
mode docker image. The server crash randomly withError: socket hang up
even under super light load. But when runningnext dev
locally, it was all normal.I tried all different kinds of troubleshooting, including changing AWS health check settings, moving all API routes from app dir to pages dir, trying different next-auth versions, setting
httpAgentOptions.keepAlive
to false, switching docker image base from alpine to debian, etc. None of which fixed the issue.Update 18 Aug
Please ignore my workaround above. I just found out that 13.3.4 is also broken, just happened that it didn’t throw the error but crashes the server without printing any logs.
The true fix I found so far is to run a x64 image, and the error is gone even under heavy load:
However, this is far from ideal, it takes way longer for my M chip MacBook to build, and I’ll have to migrate my AWS server architecture from ARM to x64 😢.
And one more thing, the current latest version
13.4.18
is still broken in the standalone mode due to env variables bug, see https://github.com/vercel/next.js/issues/53367, so i have to test with the last working version13.4.12
. While the PR https://github.com/vercel/next.js/pull/54203 to fix the env variable bug has been merged, it might take a while to be released on npm.@ielaajezdev saw you mention you tried
linux/amd64
but did work, can you share more detail? like how you build the image and how you trigger the error?Update 19 Aug
Found this issue https://github.com/prisma/prisma/issues/19419. And i tried the workaround posted by @tigranbs, setting Prisma’s DB url with
?connection_limit=1
fixed the error for me, but obviously it’s not the fix to this bug.I also tried,
prisma@4.16.2
andprisma@5.1.1
, and getting the error for both. But when I load test on a route that doesn’t fetch data with Prisma, the error is gone.So for me, the causes of the error can be narrowed down to:
next@13.4.x
+prisma@4
/prisma@5
@piotrcichosz @0xadada @mthmcalixto @renanrodrigueszup did your case follow this pattern?
Update 19 Aug
Tried the latest
next@13.4.19
, still getting the error under the conditions mentioned above, but the error message is different this time:By running
netstat -tulpn | grep LISTEN
before the load test, the process on port39679
is:I would like to add my experience with this issue
So, I don’t think the issue is related to Prisma or NextAuth
In case anyone is using Sentry, our issue turned out to be related to a bug with the
@sentry/nextjs package
. Bumping it up a version has fixed the issue on our end.https://github.com/orgs/vercel/discussions/3248#discussioncomment-7851868
I have just tried v14.0.4-canary.47 and the issue persists. I also tried Node.js v18 and v20.
We are only using the App Router. We do not use Prisma or NextAuth. This is affecting builds hosted on Vercel.com (including production).
It takes a little while for the issue to pop up after deploying, but after a few RSC renders, it happens quite often (~15% of the time).
Wow… this is really broad. Personally, I have a deployed project running
13.4.9
on bare Windows Server 2022 Datacenter, and it hasn’t had any of those errors. Though, I did get the errors while developing it, but at the time I believe it was some one-time-thing.Also, 4 days ago on another project, I was messing around with next.js source code (in
node_modules
directly) to optimize API compression mechanism and I always got the errors consistenly. The first request completed/downloaded its content successfully, but I noticed the request was still going. I closed the tab and opened a new one, and it looked like the request just hung. Any requests to the server completely not responsive.In that experiment I don’t think the render worker process died, I believe it is something else. I will try to replicate it, but I don’t think I will make an issue out of it (because in next.js defence, I modified its source code). Though I hope I will come with a theory that could explain the errors.
This error sometimes happens in developer mode in version 13.4.12, when there is a lot of refresh it stops working and needs to start the terminal again.
I’m also getting the error using next@13.4.19 and prisma for
app/api
route.same here on 13.4.12
Just happened with me today using version 13.4.10. This app I’m worrking on is being deployed within a docker container on a EKS. Funny thing is, that moments before the container started logging the “Socket Hang Up” thing, the Ephemeral storage was nearly full, up to 86% usage. Maybe it could be something related to cache ? I’m keeping an eye on it trying to find a pattern.
Glad to see this thread since I am also experiencing frequent (seemingly random) “socket hang up errors” and it was quite hard to debug the root cause. I think my use case is different so I will add my error scenario just in case it makes the problem exploration easier.
Setup
pages/*
andpages/api/*
directories. not using the new app routerI am developing my next app in a Docker compose composition. The next app runs in the node 18 alpine image as per the Docker examples provided. Other containers in the composition are a postgres db, Prisma (studio), cerbos and strapi. I am developing on MacOS (Mac with M2). I have not yet deployed my app to production and only use development mode currently.
Answers to your questions
Things I have tried
linux/amd64
platform for my Dockerfile and that did seem to resolve issues but was too slow in the development environment on my M2 macHope this helps! Following this issue and reverting to next 13.4.5 for now…
13.4.12 with the same problem man
Same issue here, happening very often, impossible to find where it comes from.
I am using “next”: “^14.0.4”, with nextAuth, and nextJS middleware (my app uses also Wundergraph/sdk)
Any update on this issue?
Thx a lot
“next”: “^14.0.3” the same issue when running custom server
In our case it is not random. There is a feed.xml route in our app and it sends a request to S3 to get the actual feed. But the file size is about ~100mb, it gives “socket hang up” error and then it breaks the entire app. No way to handle the error in try-catch.
I’ve tried almost everything to get rid of this error, including trying to use node https or other 3rd party http clients instead of fetch api. But it seems, the problem is not related to next, it’s a node.js or maybe undici related problem. Also tried some other node images but I think a stable node image could fix this issue.
We could “solve” or at least work around the socket hang up errors by replacing node with bun.
After updating to Prisma 5.3.0, the issue seems to be resolved (in development) so it might be that the issues are unrelated, but I will continue to monitor.
Currently I have this issue very often (almost everyday). Situation is always the same: socket hang up,
next-render-worker-app
is missing in processes, server doesn’t respond - just hangs.BTW prisma maintainers said they fixed related problem: https://github.com/prisma/prisma/issues/19419 – maybe this helps. I haven’t tried it yet since the fix is in unstable branch for now.
@mdluo not exactly, we don’t use prisma, and can see it occur after a single (non-concurrent) request.
@0xadada Interesting… It seems like your issue is a bit different but I think it correlates. I think this could be just the client aborting the connection and next.js still actively waiting for incoming TCP packets.
I’ll try to reproduce it by making a long request and abort it. See if I can get a reproduction.
We are also running into this issue, with the same circumstances as described before:
We will now downgrade to
13.4.5
and watch if it happens again, unfortunately we have no way to test it consistently. Time will tell…?For info, we are running on AWS EC2 instances. @0xadada do I understand correctly that in your case following requests do get handled? For us it seems to completely stop the server from being able to handle any requests after that point.