[Error 500] "Socket Hang Up" Randomly Occurring on any Routes in Production Mode #60148

SebastienSusini · 2023-06-21T09:57:16Z

SebastienSusini
Jun 21, 2023

Verify canary release

I verified that the issue exists in the latest Next.js canary release

Provide environment information

Operating System:
      Platform: darwin
      Arch: x64
      Version: Darwin Kernel Version 21.6.0: Mon Aug 22 20:17:10 PDT 2022; root:xnu-8020.140.49~2/RELEASE_X86_64
    Binaries:
      Node: 16.14.2
      npm: 8.5.0
      Yarn: 1.22.15
      pnpm: 6.11.0
    Relevant packages:
      next: 13.4.6
      eslint-config-next: 13.4.2
      react: 18.2.0
      react-dom: 18.2.0
      typescript: 4.9.5

Which area(s) of Next.js are affected? (leave empty if unsure)

No response

Link to the code that reproduces this issue or a replay of the bug

not possible confidential

To Reproduce

this our package.json

`{
  "name": "********",
  "version": "0.1.0",
  "private": true,
  "scripts": {
    "dev": "next dev",
    "dev-https": "NODE_TLS_REJECT_UNAUTHORIZED='0' node server.js",
    "ngrok": "ngrok http https://localhost:3000",
    "build": "next build",
    "postbuild": "next-sitemap",
    "start": "next start",
    "clean": "rimraf .next out",
    "lint": "next lint",
    "lint.fix": "next lint --fix",
    "test": "jest --watch",
    "prepare": "husky install",
    "analyze": "ANALYZE=true next build"
  },
  "dependencies": {
    "@everipedia/wagmi-magic-connector": "^0.12.1",
    "@headlessui/react": "^1.7.15",
    "@headlessui/tailwindcss": "^0.1.3",
    "@heroicons/react": "^1.0.6",
    "@next/bundle-analyzer": "^12.2.0",
    "@next/env": "^13.1.5",
    "@radix-ui/react-dropdown-menu": "^2.0.5",
    "@rainbow-me/rainbowkit": "^0.12.15",
    "@ramp-network/ramp-instant-sdk": "^4.0.2",
    "@react-spring/web": "^9.6.1",
    "@react-three/cannon": "^6.4.0",
    "@react-three/drei": "^9.34.3",
    "@react-three/fiber": "^8.8.10",
    "@segment/analytics-next": "^1.52.0",
    "@sentry/nextjs": "^7.54.0",
    "@stripe/react-stripe-js": "^1.16.3",
    "@stripe/stripe-js": "^1.46.0",
    "@tanstack/react-table": "^8.5.13",
    "@use-gesture/react": "^10.2.19",
    "axios": "^1.4.0",
    "clsx": "^1.2.1",
    "cookies-next": "^2.1.1",
    "date-fns": "^2.29.3",
    "ethers": "^5.7.1",
    "i18next": "^22.4.9",
    "next": "^13.4.6",
    "next-auth": "^4.21.1",
    "next-axiom": "^0.17.0",
    "next-i18next": "^11.3.0",
    "next-password-protect": "^1.8.0",
    "next-share": "^0.18.2",
    "next-sitemap": "^3.1.47",
    "nextjs-progressbar": "^0.0.14",
    "react": "^18.2.0",
    "react-canvas-confetti": "^1.3.0",
    "react-countup": "^6.4.0",
    "react-csv": "^2.2.2",
    "react-currency-input-field": "^3.6.10",
    "react-device-detect": "^2.2.3",
    "react-div-100vh": "^0.7.0",
    "react-dom": "^18.2.0",
    "react-fast-marquee": "^1.3.5",
    "react-hook-form": "^7.41.5",
    "react-hot-toast": "^2.4.0",
    "react-i18next": "^12.1.4",
    "react-icons": "^4.8.0",
    "react-infinite-scroll-component": "^6.1.0",
    "react-intersection-observer": "^9.4.1",
    "react-spring-bottom-sheet": "^3.5.0-alpha.0",
    "react-type-animation": "^2.1.1",
    "react-use-intercom": "^3.0.2",
    "recharts": "2.5.0",
    "sharp": "^0.30.7",
    "swiper": "^9.1.1",
    "swr": "1.3.0",
    "tailwind-merge": "^1.13.1",
    "tailwind-scrollbar": "^3.0.0",
    "tailwind-scrollbar-hide": "^1.1.7",
    "tailwindcss": "^3.1.4",
    "three": "^0.144.0",
    "uuid": "^9.0.0",
    "wagmi": "^0.12.12"
  },
  "devDependencies": {
    "@commitlint/cli": "^17.0.3",
    "@commitlint/config-conventional": "^17.3.0",
    "@testing-library/jest-dom": "^5.16.4",
    "@testing-library/react": "^13.3.0",
    "@types/jest": "^28.1.4",
    "@types/node": "18.0.0",
    "@types/react": "18.0.14",
    "@types/react-csv": "^1.1.3",
    "@types/react-dom": "18.0.5",
    "@types/react-stripe-elements": "^6.0.6",
    "@types/three": "^0.143.0",
    "@types/uuid": "^8.3.4",
    "@typescript-eslint/eslint-plugin": "^5.30.0",
    "@typescript-eslint/parser": "^5.30.0",
    "autoprefixer": "^10.4.7",
    "commitizen": "^4.2.6",
    "commitlint": "^11.0.0",
    "commitlint-config-gitmoji": "2.2.5",
    "cssnano": "^5.1.12",
    "cz-conventional-changelog": "^3.3.0",
    "eslint": "8.18.0",
    "eslint-config-airbnb-base": "^15.0.0",
    "eslint-config-airbnb-typescript": "^17.0.0",
    "eslint-config-next": "^13.3.0",
    "eslint-config-prettier": "^8.5.0",
    "eslint-plugin-import": "^2.26.0",
    "eslint-plugin-jsx-a11y": "^6.6.0",
    "eslint-plugin-prettier": "^4.1.0",
    "eslint-plugin-react": "^7.30.1",
    "eslint-plugin-react-hooks": "^4.6.0",
    "eslint-plugin-simple-import-sort": "^7.0.0",
    "eslint-plugin-tailwindcss": "^3.6.0",
    "eslint-plugin-unused-imports": "^2.0.0",
    "husky": "^8.0.0",
    "jest": "^28.1.2",
    "jest-environment-jsdom": "^28.1.2",
    "lint-staged": "^13.0.3",
    "postcss": "^8.4.14",
    "prettier": "^2.7.1",
    "rimraf": "^3.0.2",
    "typescript": "^4.9.5"
  },
  "config": {
    "commitizen": {
      "path": "./node_modules/cz-conventional-changelog"
    }
  }
}`

our next.config.js :

`/** @type {import('next').NextConfig} */

const { withSentryConfig } = require('@sentry/nextjs');
const { withAxiom } = require('next-axiom');
const withBundleAnalyzer = require('@next/bundle-analyzer')({
  enabled: process.env.ANALYZE === 'true',
});

const { i18n } = require('./next-i18next.config');

const IS_PROTECTED = process.env.NEXT_PUBLIC_NODE_ENV === 'staging';

const securityHeaders = [
  {
    key: 'X-XSS-Protection',
    value: '1; mode=block',
  },
  {
    key: 'X-Content-Type-Options',
    value: 'nosniff',
  },
  {
    key: 'Referrer-Policy',
    value: 'origin-when-cross-origin',
  },
  {
    key: 'X-DNS-Prefetch-Control',
    value: 'on',
  },
  {
    key: 'Strict-Transport-Security',
    value: 'max-age=63072000; includeSubDomains; preload',
  },
];

const nextConfig = withAxiom(
  withBundleAnalyzer({
    reactStrictMode: true,
    swcMinify: false,
    i18n,
    env: {
      PASSWORD_PROTECT: IS_PROTECTED,
    },
    images: {
      domains: ['lh3.googleusercontent.com', 'i.scdn.co'],
    },
    sentry: {
      widenClientFileUpload: true,
      hideSourceMaps: true,
      automaticVercelMonitors: false,
    },
    // transpilePackages: ['react-native'],
    async redirects() {
      return [
        {
          source: '/login',
          destination: '/auth/login',
          permanent: true,
        },
        {
          source: '/signup',
          destination: '/auth/signup',
          permanent: true,
        },
        {
          source: '/dashboard',
          destination: '/users/dashboard',
          permanent: true,
        },
        {
          source: '/backstage',
          destination: '/artists/backstage',
          permanent: true,
        },
        {
          source: '/explore',
          destination: '/search',
          permanent: true,
        },
        {
          source: '/faqs',
          destination: '/faq',
          permanent: true,
        },
        {
          source: '/users/reward-tasks',
          destination: '/users/game/explain',
          permanent: true,
        },
      ];
    },
    async headers() {
      return [
        {
          source: '/:path*',
          headers: securityHeaders,
        },
        {
          source: '/.well-known/apple-developer-merchantid-domain-association',
          headers: [{ key: 'Content-Type', value: 'application/json' }],
        },
      ];
    },
    webpack: (config) => {
      config.module.rules.push({
        test: /\.pdf$/,
        use: {
          loader: 'file-loader',
          options: {
            name: '[path][name].[ext]',
          },
        },
      });
      // config.externals.push('react-native');
      return config;
    },
  })
);

const sentryWebpackPluginOptions = {
  org: '*****-*****',
  project: '*****-nextjs',
  silent: true, // Suppresses all logs
  // For all available options, see:
  // https://github.com/getsentry/sentry-webpack-plugin#options.
};

module.exports = withSentryConfig(nextConfig, sentryWebpackPluginOptions);

our middleware.ts

/* eslint-disable consistent-return */
import type { NextRequest } from 'next/server';
import { NextResponse } from 'next/server';
import { withAuth } from 'next-auth/middleware';

const ROLES_ALLOWED_TO_AUTH = new Set<any>(['artist', 'user']);

export default withAuth(
  function middleware(req: NextRequest & { nextauth: { token: any } }) {s
    // Redirect if they don't have the appropriate role
    if (
      req.nextUrl.pathname.startsWith('/artists/backstage') ||
      req.nextUrl.pathname.startsWith('/artists/onboarding') ||
      req.nextUrl.pathname.startsWith('/artists/new')
    ) {
      if (!ROLES_ALLOWED_TO_AUTH.has(req.nextauth.token?.userRole)) {
        return NextResponse.redirect(new URL('/auth/login', req.url));
      }
      if (req.nextauth.token?.userRole === 'user' && req.nextauth.token?.userRole !== 'artist') {
        return NextResponse.redirect(new URL('/users/dashboard', req.url));
      }
      if (req.nextauth.token?.userRole === 'artist') {
        return NextResponse.next();
      }
    }
  },
  {
    callbacks: {
      authorized: ({ token }) =>
        token?.userRole !== undefined && ROLES_ALLOWED_TO_AUTH.has(token.userRole),
    },
  }
);

export const config = {
  matcher: [
    '/feed',
    '/artists/new/:path*',
    '/artists/backstage/:path*',
    '/artists/onboarding/:path*',
    '/users/dashboard/:path*',
    '/users/game/:path*',
    '/users/settings',
  ],
};

Describe the Bug

We are experiencing a bug that occurs randomly for some of our users, only in production, on any route of the site, and it has never been reported on Sentry. We can only see it in the Vercel logs.

The full error message is as follows:
Uncaught Exception {"errorType":"Error","errorMessage":"socket hang up","code":"ECONNRESET","stack":["Error: socket hang up"," at connResetException (node:internal/errors:717:14)"," at TLSSocket.socketOnEnd (node:_http_client:526:23)"," at TLSSocket.emit (node:events:525:35)"," at TLSSocket.emit (node:domain:489:12)"," at endReadableNT (node:internal/streams/readable:1359:12)"," at process.processTicksAndRejections (node:internal/process/task_queues:82:21)"]} Unknown application error occurred Runtime.Unknown.

We think (but can't verify) that this bug appeared when we updated to Next.js 13. However, none of our pages use appRouter; we're still using Page Router for the time being. We've seen that rewrites can cause socket hangs, but as you can see in our next.config.js, we don't use rewrites.

This can happen on SSG (Static Site Generation), SSR (Server-Side Rendering), or Client-side rendered pages.
It can also happen on any browser or device.

Honestly, we have no clue or way of reproducing this problem because even in our development environment, we don't encounter any problems.

Expected Behavior

I expect the application to work seamlessly without any errors or disruptions. Specifically, I anticipate that the mentioned "Socket Hang Up" error will not occur randomly in production mode on any route of the site. Additionally, I hope that better error handling mechanisms will be implemented to address any potential issues that may arise.

Which browser are you using? (if relevant)

No response

How are you deploying your application? (if relevant)

Vercel

NadhifRadityo · 2023-06-21T13:46:28Z

NadhifRadityo
Jun 21, 2023

This issue will be easier to assess if you provide a simple project that reproduces this issue. Nevertheless, based on your stack trace, it looks like you are trying to connect to TLS/SSL socket (which I doubt Nextjs handles such a thing, it is probably handled by one of your libraries). Based on your dependencies too, I am gonna give a big shot that you are somehow trying to connect to a database to authenticate a user. This is a wild guess, but I think the connection between your web server to your database is somehow closed (or not stable, or anything in between really).

This is already out of scope. But maybe, for a quick fix, you can check the connection to your database, or simply restart your Nextjs server (if possible. Because it will re-instantiate the database variable and database connection).

0 replies

SebastienSusini · 2023-06-22T08:12:08Z

SebastienSusini
Jun 22, 2023
Author

Thank's for your response.

We don't have a direct connection to a database from Next, we use a Ruby on Rails API, and on the rails side we don't get any errors (we don't even see an API call when we get this error on Next in the rails log).

We don't provide any code because I don't think it's very interesting, the bug is really random, and only happens in production mode, I can however make a minimal reproducible example with all the dependencies and simulate our login page deployed on Vercel, but I have no guarantee that the bug will happen again.

I'm attaching an image below with more information from Vercel, which shows that neither memory nor execution time is exceeded.

0 replies

NadhifRadityo · 2023-06-26T08:00:27Z

NadhifRadityo
Jun 26, 2023

Another thing I overlooked; Shouldn't database call trigger DYNAMIC cache level? I have never deployed to vercel but I think that's a bit weird. Can errors be cached? But this still would not explain why the error happens in the first place.

Perhaps the socket hangup is from the vercel side? They have HTTPS handling on their side, and if the client suddenly closes the connection whilst the request isn't complete, maybe it'll throw an error? But I don't think that's the case either. If it were the case many vercel users would have reported that already.

Maybe you could provide a simple reproduction code, and see if I can reproduce it myself on vercel.

0 replies

2023-06-30T14:47:39Z

github-actions[bot]
bot Jun 30, 2023

We cannot recreate the issue with the provided information. Please add a reproduction in order for us to be able to investigate.

Why was this issue marked with the `please add a complete reproduction` label?

To be able to investigate, we need access to a reproduction to identify what triggered the issue. We prefer a link to a public GitHub repository (template for pages, template for App Router), but you can also use these templates: CodeSandbox: pages or CodeSandbox: App Router.

To make sure the issue is resolved as quickly as possible, please make sure that the reproduction is as minimal as possible. This means that you should remove unnecessary code, files, and dependencies that do not contribute to the issue.

Please test your reproduction against the latest version of Next.js (next@canary) to make sure your issue has not already been fixed.

I added a link, why was it still marked?

Ensure the link is pointing to a codebase that is accessible (e.g. not a private repository). "example.com", "n/a", "will add later", etc. are not acceptable links -- we need to see a public codebase. See the above section for accepted links.

What happens if I don't provide a sufficient minimal reproduction?

Issues with the please add a complete reproduction label that receives no meaningful activity (e.g. new comments with a reproduction link) are automatically closed and locked after 30 days.

If your issue has not been resolved in that time and it has been closed/locked, please open a new issue with the required reproduction.

I did not open this issue, but it is relevant to me, what can I do to help?

Anyone experiencing the same issue is welcome to provide a minimal reproduction following the above steps. Furthermore, you can upvote the issue using the 👍 reaction on the topmost comment (please do not comment "I have the same issue" without reproduction steps). Then, we can sort issues by votes to prioritize.

I think my reproduction is good enough, why aren't you looking into it quicker?

We look into every Next.js issue and constantly monitor open issues for new comments.

However, sometimes we might miss one or two due to the popularity/high traffic of the repository. We apologize, and kindly ask you to refrain from tagging core maintainers, as that will usually not result in increased priority.

Upvoting issues to show your interest will help us prioritize and address them as quickly as possible. That said, every issue is important to us, and if an issue gets closed by accident, we encourage you to open a new one linking to the old issue and we will look into it.

Useful Resources

0 replies

piotrcichosz · 2023-07-23T10:10:09Z

piotrcichosz
Jul 23, 2023

We're facing the same issue, also shortly after upgrading to next 13

Logs are looking like this:
Error: socket hang up at connResetException (node:internal/errors:705:14) at Socket.socketOnEnd (node:_http_client:518:23) at Socket.emit (node:events:525:35) at runInContextCb (/app/node_modules/newrelic/lib/shim/shim.js:1322:22) at LegacyContextManager.runInContext (/app/node_modules/newrelic/lib/context-manager/legacy-context-manager.js:59:23) at Shim.applySegment (/app/node_modules/newrelic/lib/shim/shim.js:1312:25) at Socket.wrapper [as emit] (/app/node_modules/newrelic/lib/shim/shim.js:1898:17) at endReadableNT (node:internal/streams/readable:1358:12) at runInContextCb (/app/node_modules/newrelic/lib/shim/shim.js:1322:22) at LegacyContextManager.runInContext (/app/node_modules/newrelic/lib/context-manager/legacy-context-manager.js:59:23) { code: 'ECONNRESET'}
arond 100 errors like this, then:
Error: read ECONNRESET at TCP.onStreamRead (node:internal/stream_base_commons:217:20) at runInContextCb (/app/node_modules/newrelic/lib/shim/shim.js:1322:22) at LegacyContextManager.runInContext (/app/node_modules/newrelic/lib/context-manager/legacy-context-manager.js:59:23) at Shim.applySegment (/app/node_modules/newrelic/lib/shim/shim.js:1312:25) at TCP.wrapper (/app/node_modules/newrelic/lib/shim/shim.js:1898:17) at TCP.callbackTrampoline (node:internal/async_hooks:130:17) { errno: -104, code: 'ECONNRESET', syscall: 'read'}
9 this errors (with 3 ecs task instances, on each 2 process running)

and then thousands of errors:
Error: connect ECONNREFUSED 192.168.214.16:33105 at TCPConnectWrap.afterConnect [as oncomplete] (node:net:1278:16) at TCPConnectWrap.callbackTrampoline (node:internal/async_hooks:130:17) { errno: -111, code: 'ECONNREFUSED', syscall: 'connect', address: '192.168.214.16', port: 33105}

This is random but never on fresh instances, for now each time (3 times had this problem) is occuring after days since deploy. Looks like once socket breaks it can't be recreated?

I saw @SebastienSusini is using vercen, I'm using aws ecs tasks

0 replies

piotrcichosz · 2023-07-23T10:15:12Z

piotrcichosz
Jul 23, 2023

btw I can't share whole project, and since it can happen after week and milion request, I'm not sure how easy would it be to recreate it. Maybe easier route would be enable some debug logs?

@SebastienSusini are you having big traffic on that project? Are you experiencing this errors also after some time passes since deploy or randomly it can happen few minutes/hours after deploy? When you upgraded to 13 and how many these incidents you had?

0 replies

0xadada · 2023-07-27T19:43:52Z

0xadada
Jul 27, 2023

I'm also seeing this same error, and it occurs 1/10 requests reliably on production, but cannot reproduce it locally with curl or browser requests.

Next.js 13.4.10
Node v18

Error: aborted
  at connResetException (node:internal/errors:720:14)
  at Socket.socketCloseListener (node:_http_client:461:19)
  at Socket.emit (node:events:526:35)�
  at TCP.<anonymous> (node:net:323:12) {
    code: �'ECONNRESET'

Notably, my app renders the page, but occasionally throws this error in 13.4.6. As of 13.4.10, the rendering fails when this error is thrown.

0 replies

0xadada · 2023-07-27T19:50:04Z

0xadada
Jul 27, 2023

Seems possibly related to #49587

0 replies

0xadada · 2023-07-27T20:40:14Z

0xadada
Jul 27, 2023

@SebastienSusini I also isolated this issue in my app to have started in next 13.4.6 (same version in your bug report), and reverting to 13.4.5 resolved the issue 🎉

1 reply

laotoutou Apr 22, 2024

I downgraded from version 14.1.4 to 13.4.5, and it solved the problem!

mthmcalixto · 2023-07-28T07:42:34Z

mthmcalixto
Jul 28, 2023

@SebastienSusini I also isolated this issue in my app to have started in next (same version in your bug report), and reverting to 13.4.5 resolved the issue13.4.6 🎉

13.4.12 with the same problem man

0 replies

dbrxnds · 2023-07-28T13:48:20Z

dbrxnds
Jul 28, 2023

We are also running into this issue, with the same circumstances as described before:

Only seems to occur in production. (Possibly due to load?)
Occurs only after a couple of days or a week+, after millions of requests have already happened
Unable to pin-point one specific action/request that causes this issue

Error: socket hang up
    at connResetException (node:internal/errors:705:14)
    at Socket.socketOnEnd (node:_http_client:518:23)
    at Socket.emit (node:events:525:35)
    at Socket.emit (node:domain:489:12)
    at endReadableNT (node:internal/streams/readable:1358:12)
    at processTicksAndRejections (node:internal/process/task_queues:83:21) {
  code: 'ECONNRESET'

We will now downgrade to 13.4.5 and watch if it happens again, unfortunately we have no way to test it consistently. Time will tell..?

For info, we are running on AWS EC2 instances. @0xadada do I understand correctly that in your case following requests do get handled? For us it seems to completely stop the server from being able to handle any requests after that point.

0 replies

NadhifRadityo · 2023-07-28T23:21:37Z

NadhifRadityo
Jul 28, 2023

Socket hangups do occur from time to time if the client is aborting the connection, and it seems like after it aborted next.js still actively waiting for incoming TCP packets.

There are few candidates where this error could occur, but since this error is happening on production mode where incoming traffic might be huge and really hard to reproduce on a small scale, pinpointing an exact part is hard. Nonetheless, I have some rough ideas where this problem(s) could be, based on the effects some were mentioned.

Requests after the error don't get handled (completely bricks the server)

This could be either the ipc worker process died or the render worker process died. Either processes could be purposefully killed if the memory usage is above the limit. But after killed or died, jest worker should respawn these processes, and it might have failed. (I need to mention, that I noticed jest worker is a bit sloppy handling respawning on my machine).
This could be either the ipc worker process or the render worker process still handling another request that synchronously doing something that makes subsequent request timed out. https://stackoverflow.com/questions/16995184/nodejs-what-does-socket-hang-up-actually-mean

Requests still get handled correctly after the error. This could be just the client aborting the connection and next.js still actively waiting for incoming TCP packets.

In both cases, next.js uses http-proxy to forward the requests between processes. Though I might write a proposal to rewrite IPC communications between next.js processes to handle requests better, (support for IPC callbacks; passing req,res pair to another process; etc)

@dbrxnds With the first point described, does the subsequent request after the error failed immediately or are there timeout before the subsequent request failed?

@0xadada I need to confirm, are you deploying this on a machine or shared hosting (vercel, etc)? Do you use appDir or pageDir or combination of both?

0 replies

dbrxnds · 2023-07-28T23:28:05Z

dbrxnds
Jul 28, 2023

Appreciate the well written response, @NadhifRadityo.

I am fairly certain subsequent requests just hang, at least for a good while. We end up getting an error response saying "the upstream server returned an invalid response" but I assume that is just the load balancer or some other part doing its' thing. Requests do just remain pending in your network tab until that point

0 replies

NadhifRadityo · 2023-07-28T23:53:55Z

NadhifRadityo
Jul 28, 2023

This seems unlikely, but are there a chance of your next.js project does I/O operations synchronously or heavy synchronous tasks?

Also I need to confirm, the request hangs for any routes right? (dynamic page, static page, static resources)

And to make things sure, can you do a process list with process arguments, before and after the error? Search something like node processChild.js ... and write down the PID. The next time the error happens do a proccess list again and check if node processChild.js ... still there and compare if the PID changes.

And for the record, do you use appDir only or pageDir or combination of both?

I will try to eliminate IPC communication first as it makes the most sense in my opinion. I'll try manually killing the worker process, and see if I can reproduce the problem.

0 replies

0xadada · 2023-07-29T00:58:43Z

0xadada
Jul 29, 2023

@0xadada I need to confirm, are you deploying this on a machine or shared hosting (vercel, etc)? Do you use appDir or pageDir or combination of both?

@NadhifRadityo yes, i've got next next.js process running in a shared Docker container with a ruby webserver. Ruby webclient makes HTTP requests to our next process on localhost running next start

0 replies

kaykdm · 2023-11-29T02:31:20Z

kaykdm
Nov 29, 2023

I faced the same issue when I tried to upgrade Node.js version to 20 from 16, and it only occurs in production environment
My environment:

    Operating System:
      Platform: darwin
      Arch: arm64
      Version: Darwin Kernel Version 22.5.0: Thu Jun  8 22:22:20 PDT 2023; root:xnu-8796.121.3~7/RELEASE_ARM64_T6000
    Binaries:
      Node: 20.9.0
      npm: 10.1.0
      Yarn: 4.0.1
      pnpm: N/A
    Relevant packages:
      next: 13.4.2
      eslint-config-next: 13.4.2
      react: 18.2.0
      react-dom: 18.2.0
      typescript: 5.2.2

However, after upgrading Next.js to latest (14.0.3), it seems the issues is gone

0 replies

netergart · 2023-11-30T13:15:59Z

netergart
Nov 30, 2023

"next": "^14.0.3" the same issue when running custom server

0 replies

EnriqCG · 2023-12-07T11:43:08Z

EnriqCG
Dec 7, 2023

I have just tried v14.0.4-canary.47 and the issue persists. I also tried Node.js v18 and v20.

We are only using the App Router. We do not use Prisma or NextAuth. This is affecting builds hosted on Vercel.com (including production).

It takes a little while for the issue to pop up after deploying, but after a few RSC renders, it happens quite often (~15% of the time).

0 replies

timcouchoud · 2023-12-19T11:34:55Z

timcouchoud
Dec 19, 2023

Same issue here, happening very often, impossible to find where it comes from.

I am using "next": "^14.0.4", with nextAuth, and nextJS middleware (my app uses also Wundergraph/sdk)

Any update on this issue?

Thx a lot

0 replies

tehseenc · 2023-12-19T16:31:14Z

tehseenc
Dec 19, 2023

In case anyone is using Sentry, our issue turned out to be related to a bug with the @sentry/nextjs package. Bumping it up a version has fixed the issue on our end.

https://github.com/orgs/vercel/discussions/3248#discussioncomment-7851868

0 replies

timcouchoud · 2023-12-19T16:39:13Z

timcouchoud
Dec 19, 2023

In case anyone is using Sentry, our issue turned out to be related to a bug with the @sentry/nextjs package. Bumping it up a version has fixed the issue on our end.

https://github.com/orgs/vercel/discussions/3248#discussioncomment-7851868

I am not using Sentry, but I would be very interested to understand which type of error from Sentry was solved. Indeed I have "Socket hang up error" quite often but have found no ways to track the issue for now... Thx

I am using nextjs 14.0.4, nextauth with middleware, and wundergraph as backend.

0 replies

stefvw93 · 2023-12-20T08:56:10Z

stefvw93
Dec 20, 2023

Also have this exact problem, using NextJS 14.0.3. The error is not caught by our NextJS error boundary, and the user sees the default Vercel 500 error page (black background, white text).

Site works after simply refreshing. My initial thought (before finding this thread) was; could this have something to do with cookies from Vercel preview deployments?

0 replies

marpstar · 2023-12-22T05:04:30Z

marpstar
Dec 22, 2023

Also experiencing this issue. Node 18/20, Next 14.0.4, next-auth 4.24.5. Specifically, my /api/auth/[...nextauth] route hangs. Initially it produces an "Outgoing request timed out after 3500ms" from next-auth, but if I extend the timeout past 60 seconds, I get the full error from Node:

https://next-auth.js.org/errors#signin_oauth_error socket hang up

error: {
  message: 'socket hang up"
    stack: 'Error: socket hang up\n' +
    at connResetException (node:internal/errors:720:14)\n' + 
    at TLSSocket.socketOnEnd (node:_http_client:525:23)In' +
    at TLSSocket.emit (node: events:526:35)\n' + 
    at endReadableNT (node: internal/streams/readable:1359:12) In' + at process.processTicksAndRejections     (node:internal/process/task_queues: 82:21)',
  name: 'Error'
}

Not using Vercel, this is on a Windows Server 2022 VM and locally on my M1 Mac. Works fine running npm run dev; doesn't seem to be related to DNS or IPv4 vs IPv6, if you enable logging via NODE_DEBUG=http,https,net,tls, I'm seeing that is making a call to my OAuth provider, getting the correct IP addresses via DNS, and attempting to open a connection.

HTTPS 9140: reuse session for "sso.example.com:443:::::::::::::::"
NET 9140: pipe false null
NET 9140: connect: find host sso.example.com
NET 9140: connect: dns options { family: undefined, hints: 0 }
NET 9140: connect: autodetecting
NET 9140: _read - n 16384 isConnecting? true hasHandle? true
NET 9140: read wait for connection
NET 9140: connect/multiple: only one address found, switching back to single connection
NET 9140: connect: attempting to connect to X.XX.XXX.XXX:443 (addressType: 4)
NET 9140: afterConnect
NET 9140: _read - n 16384 isConnecting? false hasHandle? true
NET 9140: Socket._handle.readStart                   <---- it hangs at this step
NET 9140: destroy
NET 9140: close
NET 9140: close handle

I was seeing this intermittently, but over the last week something has changed and I am seeing it 100% of the time.

Again, this all works perfectly when running npm run dev, so it's seems like the compilation/runtime is at least part of the problem.

Other issues I've found along the way that may be related?

0 replies

raphi · 2023-12-22T15:27:46Z

raphi
Dec 22, 2023

Exact same issue on Node 18, Next 14.0.4, next-auth 4.24.5.

In pages/_error.js I've changed export const getStaticProps for Error.getInitialProps as their documentation states:

Error does not currently support Next.js Data Fetching methods like getStaticProps or getServerSideProps.

Not sure how it's related, but the random freezes we experienced in production every few days are now completely gone (2 weeks in a row without this issue)!

I'd guess a user from time to time gets an unexpected error (from authenticating in our case), which triggered this 500 error page and since getStaticProps isn't supported it freezes the entire app 🤷🏻‍♀️ Hope that helps

0 replies

belmerbelandres · 2023-12-26T06:20:10Z

belmerbelandres
Dec 26, 2023

Hello guys, if your using sentry and keeps getting 500 error, refer to this thread Error 500. This fixed my random error 500.

0 replies

AdolfVonKleist · 2023-12-28T16:37:16Z

AdolfVonKleist
Dec 28, 2023

Same thing here with "next": "13.4.12". Issue only appears when using next.config.js rewrites. I get:

- error Error: socket hang up
    at connResetException (node:internal/errors:720:14)
    at Socket.socketCloseListener (node:_http_client:474:25)
    at Socket.emit (node:events:526:35)
    at TCP.<anonymous> (node:net:323:12) {
  code: 'ECONNRESET'
}

for any request that takes more than 30s to complete. Seems related to the previous issue:

[next.config.js rewrites] Error: socket hang up #36251

I can see that the remote server continues to process and eventually completes and returns a correct response - not the 500 that nextjs is claiming. Is there any way to eliminate the timeout or customize it to a longer period?

2 replies

lynetcha Feb 20, 2024

Did you ever resolve this ?

maybephilipp Feb 21, 2024

@lynetcha try upgrading to v14, helped us

samcx · 2024-01-03T00:12:11Z

samcx
Jan 3, 2024
Maintainer

Hi everyone,

I will be moving this issue to our :nextjs: Discussions, as there are multiple different issues shared in this thread that is causing a socket hang up (e.g., Sentry issue, improperly handled async in a Serverless Environment, etc.).

We encourage folks to file a new issue with a consistently reproducible :repro: if they are still coming across this issue.

Happy 2024!

3 replies

maybephilipp Jan 11, 2024

Hello! Creating a reproducible repo is difficult for the issue, because it happens randomly and it's impossible to understand what causes the issue.

The question is: are there any ways to enable debug logs in renderer process or something to be able to locate the issue or what happened right before renderer process died? reference: #60148 (comment)

samcx Feb 20, 2024
Maintainer

@maybephilipp Unfortunately, it can be difficult to reproduce this, but you can try to work backwards to fix this. Are you still experiencing issues after the Prisma fix?

maybephilipp Feb 21, 2024

@samcx no, actually, problem for me seems to be fixed and now Next works like a charm:

Prisma fix didn't help at all (it didn't fix Next crashing)
We got rid of Prisma about month ago because of performance issues and used pg slonik instead, but the Next issue wasn't resolved at all
Then we upgraded Next to v14 and it immediately fixed the issue, so now our server doesn't crash at all (like absolutely 0 crashes) and memory usage is very low – 4GB used for 8 instances running where with v13 it was about 10-12GB.

So thank you, in our case the issue is resolved.

whernandez · 2024-03-23T19:49:52Z

whernandez
Mar 23, 2024

Hello, I had the same issue:
"next": "13.4.5",
"next-auth": "^4.24.6",
"bcrypt": "^5.1.1",

Why do I mention these dependencies? Because in my case, the error was when the user tried to log in. At that moment, the bycript.compare function was executed, and, for some strange reason, if the credentials were correct, then the app would crash.
After switching from bcrypt to argon2, the problem went away.

I hope my case can help someone.

0 replies

okonet · 2024-04-19T09:38:40Z

okonet
Apr 19, 2024

I have the following error:

Unhandled Promise Rejection 	{"errorType":"Runtime.UnhandledPromiseRejection","errorMessage":"Error: socket hang up","reason":{"errorType":"Error","errorMessage":"socket hang up","code":"ECONNRESET","stack":["Error: socket hang up","    at connResetException (node:internal/errors:720:14)","    at TLSSocket.socketOnEnd (node:_http_client:525:23)","    at TLSSocket.emit (node:events:529:35)","    at TLSSocket.emit (node:domain:489:12)","    at endReadableNT (node:internal/streams/readable:1400:12)","    at process.processTicksAndRejections (node:internal/process/task_queues:82:21)"]},"promise":{},"stack":["Runtime.UnhandledPromiseRejection: Error: socket hang up","    at process.<anonymous> (file:///var/runtime/index.mjs:1276:17)","    at process.emit (node:events:529:35)","    at process.emit (node:domain:489:12)","    at emit (node:internal/process/promises:149:20)","    at processPromiseRejections (node:internal/process/promises:283:27)","    at process.processTicksAndRejections (node:internal/process/task_queues:96:32)"]}

after recently moving to Next v14 and using app router. The error occurs on a statically generated MDX pages.

0 replies

agustingabiola · 2024-04-29T04:07:32Z

agustingabiola
Apr 29, 2024

Don't know if this will be useful for someone, but in my case I was wrongly calling a route segment from a layout. The call was being cached (revalidation was set to 10 minutes) so at build time we were getting the socket hang up error and that was the response until revalidation period ended and then we would get the result of actually calling the external API.

0 replies

[Error 500] "Socket Hang Up" Randomly Occurring on any Routes in Production Mode #60148

Verify canary release

Provide environment information

Which area(s) of Next.js are affected? (leave empty if unsure)

Link to the code that reproduces this issue or a replay of the bug

To Reproduce

Describe the Bug

Expected Behavior

Which browser are you using? (if relevant)

How are you deploying your application? (if relevant)

Replies: 74 comments · 6 replies

SebastienSusini Jun 22, 2023 Author

github-actions[bot] bot Jun 30, 2023

Why was this issue marked with the please add a complete reproduction label?

I added a link, why was it still marked?

What happens if I don't provide a sufficient minimal reproduction?

I did not open this issue, but it is relevant to me, what can I do to help?

I think my reproduction is good enough, why aren't you looking into it quicker?

Useful Resources

samcx Jan 3, 2024 Maintainer

samcx Feb 20, 2024 Maintainer

Replies: 74 comments 6 replies

SebastienSusini
Jun 22, 2023
Author

github-actions[bot]
bot Jun 30, 2023

Why was this issue marked with the `please add a complete reproduction` label?

samcx
Jan 3, 2024
Maintainer

samcx Feb 20, 2024
Maintainer