wmz 发表于 2025-2-7 01:10:51

c++死锁调试 ,gdb pstack

  psatck

  ‌pstack命令‌是一个在Linux系统中用于查看进程堆栈信息的工具。
  写了一个服务端死锁程序,如下:
  
#include <iostream>#include <thread>#include <mutex>#include <chrono>#include <sys/socket.h>#include <netinet/in.h>#include <unistd.h>#include <cstring>// 定义两个互斥锁std::mutex mutex1;std::mutex mutex2;// 处理客户端请求的函数void handle_client(int client_socket, int client_id) {    std::cout << "Client " << client_id << ": Connected" << std::endl;    // 模拟客户端请求处理    if (client_id == 1) {      // 客户端 1:先获取 mutex1,再获取 mutex2      std::cout << "Client " << client_id << ": Trying to lock mutex1..." << std::endl;      std::lock_guard<std::mutex> lock1(mutex1);      std::this_thread::sleep_for(std::chrono::seconds(5));// 增加锁的持有时间      std::cout << "Client " << client_id << ": Locked mutex1, now trying to lock mutex2..." << std::endl;      // 尝试获取 mutex2      std::lock_guard<std::mutex> lock2(mutex2);// 死锁发生点      std::this_thread::sleep_for(std::chrono::seconds(5));// 模拟更多工作      std::cout << "Client " << client_id << ": Locked both mutex1 and mutex2" << std::endl;    } else if (client_id == 2) {      // 客户端 2:先获取 mutex2,再获取 mutex1      std::cout << "Client " << client_id << ": Trying to lock mutex2..." << std::endl;      std::lock_guard<std::mutex> lock2(mutex2);      std::this_thread::sleep_for(std::chrono::seconds(5));// 增加锁的持有时间      std::cout << "Client " << client_id << ": Locked mutex2, now trying to lock mutex1..." << std::endl;      // 尝试获取 mutex1      std::lock_guard<std::mutex> lock1(mutex1);// 死锁发生点      std::this_thread::sleep_for(std::chrono::seconds(5));// 模拟更多工作      std::cout << "Client " << client_id << ": Locked both mutex1 and mutex2" << std::endl;    }    // 关闭客户端连接    close(client_socket);    std::cout << "Client " << client_id << ": Disconnected" << std::endl;}// TCP 服务器主函数void start_server(int port) {    int server_fd, new_socket;    struct sockaddr_in address;    int opt = 1;    int addrlen = sizeof(address);    // 创建 socket 文件描述符    if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) {      perror("socket failed");      exit(EXIT_FAILURE);    }    // 设置 SO_REUSEADDR 选项    if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR | SO_REUSEPORT, &opt, sizeof(opt))) {      perror("setsockopt failed");      exit(EXIT_FAILURE);    }    // 绑定 socket 到指定端口    address.sin_family = AF_INET;    address.sin_addr.s_addr = INADDR_ANY;    address.sin_port = htons(port);    if (bind(server_fd, (struct sockaddr *)&address, sizeof(address)) < 0) {      perror("bind failed");      exit(EXIT_FAILURE);    }    // 监听连接    if (listen(server_fd, 3) < 0) {      perror("listen failed");      exit(EXIT_FAILURE);    }    std::cout << "Server started on port " << port << ". Waiting for connections..." << std::endl;    int client_id = 1;// 用于区分不同客户端    while (true) {      // 接受新的客户端连接      if ((new_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) < 0) {            perror("accept failed");            continue;      }      // 为每个客户端创建一个新线程      std::thread client_thread(handle_client, new_socket, client_id++);      client_thread.detach();// 分离线程,允许其独立运行    }}int main() {    int port = 8080;    start_server(port);    return 0;}
tcp_deadlock_server.cpp
  编译:g++ -std=c++11 -pthread -o tcp_deadlock_server tcp_deadlock_server.cpp -g  
用telnet(telnet 127.1 8080)连两次就会死锁,服务端输出如下:  
Server started on port 8080. Waiting for connections...
Client 1: Connected
Client 1: Trying to lock mutex1...
Client 2: Connected
Client 2: Trying to lock mutex2...
Client 1: Locked mutex1, now trying to lock mutex2...
Client 2: Locked mutex2, now trying to lock mutex1...
  pstack调试死锁

  ps查看进程ID,然后pstack + 进程ID : pstack  915 > pstack_out,将输出重定向到文件,好看一些:
  
Thread 3 (LWP 919):#00x0000fffcc23821dc in ?? () from /lib64/libpthread.so.0#10x0000fffcc237b060 in pthread_mutex_lock () from /lib64/libpthread.so.0#20x00000000004012c4 in __gthread_mutex_lock (__mutex=0x420240 <mutex1>) at /usr/include/c++/7.3.0/aarch64-linux-gnu/bits/gthr-default.h:748#30x0000000000401a88 in std::mutex::lock (this=0x420240 <mutex1>) at /usr/include/c++/7.3.0/bits/std_mutex.h:103#40x0000000000401b34 in std::lock_guard<std::mutex>::lock_guard (this=0xfffcc19ce810, __m=...) at /usr/include/c++/7.3.0/bits/std_mutex.h:162#50x00000000004015ac in handle_client (client_socket=5, client_id=2) at tcp_deadlock_server.cpp:38#60x0000000000402308 in std::__invoke_impl<void, void (*)(int, int), int, int> (__f=@0x248c23e0: 0x401310 <handle_client(int, int)>, __args#0=@0x248c23dc: 5, __args#1=@0x248c23d8: 2) at /usr/include/c++/7.3.0/bits/invoke.h:60#70x0000000000401e18 in std::__invoke<void (*)(int, int), int, int> (__fn=@0x248c23e0: 0x401310 <handle_client(int, int)>, __args#0=@0x248c23dc: 5, __args#1=@0x248c23d8: 2) at /usr/include/c++/7.3.0/bits/invoke.h:95#80x00000000004029cc in std::thread::_Invoker<std::tuple<void (*)(int, int), int, int> >::_M_invoke<0ul, 1ul, 2ul> (this=0x248c23d8) at /usr/include/c++/7.3.0/thread:234#90x0000000000402970 in std::thread::_Invoker<std::tuple<void (*)(int, int), int, int> >::operator() (this=0x248c23d8) at /usr/include/c++/7.3.0/thread:243#10 0x0000000000402950 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (*)(int, int), int, int> > >::_M_run (this=0x248c23d0) at /usr/include/c++/7.3.0/thread:186#11 0x0000fffcc257e134 in ?? () from /lib64/libstdc++.so.6#12 0x0000fffcc23788cc in ?? () from /lib64/libpthread.so.0#13 0x0000fffcc22ba1ec in ?? () from /lib64/libc.so.6Thread 2 (LWP 917):#00x0000fffcc23821dc in ?? () from /lib64/libpthread.so.0#10x0000fffcc237b060 in pthread_mutex_lock () from /lib64/libpthread.so.0#20x00000000004012c4 in __gthread_mutex_lock (__mutex=0x420270 <mutex2>) at /usr/include/c++/7.3.0/aarch64-linux-gnu/bits/gthr-default.h:748#30x0000000000401a88 in std::mutex::lock (this=0x420270 <mutex2>) at /usr/include/c++/7.3.0/bits/std_mutex.h:103#40x0000000000401b34 in std::lock_guard<std::mutex>::lock_guard (this=0xfffcc21de820, __m=...) at /usr/include/c++/7.3.0/bits/std_mutex.h:162#50x0000000000401450 in handle_client (client_socket=4, client_id=1) at tcp_deadlock_server.cpp:27#60x0000000000402308 in std::__invoke_impl<void, void (*)(int, int), int, int> (__f=@0x248c2290: 0x401310 <handle_client(int, int)>, __args#0=@0x248c228c: 4, __args#1=@0x248c2288: 1) at /usr/include/c++/7.3.0/bits/invoke.h:60#70x0000000000401e18 in std::__invoke<void (*)(int, int), int, int> (__fn=@0x248c2290: 0x401310 <handle_client(int, int)>, __args#0=@0x248c228c: 4, __args#1=@0x248c2288: 1) at /usr/include/c++/7.3.0/bits/invoke.h:95#80x00000000004029cc in std::thread::_Invoker<std::tuple<void (*)(int, int), int, int> >::_M_invoke<0ul, 1ul, 2ul> (this=0x248c2288) at /usr/include/c++/7.3.0/thread:234#90x0000000000402970 in std::thread::_Invoker<std::tuple<void (*)(int, int), int, int> >::operator() (this=0x248c2288) at /usr/include/c++/7.3.0/thread:243#10 0x0000000000402950 in std::thread::_State_impl<std::thread::_Invoker<std::tuple<void (*)(int, int), int, int> > >::_M_run (this=0x248c2280) at /usr/include/c++/7.3.0/thread:186#11 0x0000fffcc257e134 in ?? () from /lib64/libstdc++.so.6#12 0x0000fffcc23788cc in ?? () from /lib64/libpthread.so.0#13 0x0000fffcc22ba1ec in ?? () from /lib64/libc.so.6Thread 1 (LWP 915):#00x0000fffcc23827c4 in accept () from /lib64/libpthread.so.0#10x0000000000401868 in start_server (port=8080) at tcp_deadlock_server.cpp:89#20x00000000004018f8 in main () at tcp_deadlock_server.cpp:102
pstack 输出
  能看到一共三个线程,Thread 3 (LWP 919)卡在pthread_mutex_lock:#5  0x00000000004015ac in handle_client (client_socket=5, client_id=2) at tcp_deadlock_server.cpp:38,
  Thread 2 (LWP 917)卡在pthread_mutex_lock:#5  0x0000000000401450 in handle_client (client_socket=4, client_id=1) at tcp_deadlock_server.cpp:27
  就发现了死锁在的位置  
  gdb

  不用c++11又写了一个程序,如下:
  
#include <iostream>#include <cstring>#include <unistd.h>#include <sys/socket.h>#include <netinet/in.h>#include <arpa/inet.h>#include <pthread.h>#include <errno.h>#include <cstdlib>#include <fcntl.h>#include <stdio.h>// 定义两个互斥锁pthread_mutex_t mutex1 = PTHREAD_MUTEX_INITIALIZER;pthread_mutex_t mutex2 = PTHREAD_MUTEX_INITIALIZER;// 处理客户端请求的函数void* handle_client(void* arg) {    int client_socket = *(static_cast<int*>(arg));    free(arg);// 释放传递给线程的动态分配的内存    std::cout << "Client connected with socket: " << client_socket << std::endl;    // 模拟客户端请求处理    if (client_socket == 4) {      // 客户端 1:先获取 mutex1,再获取 mutex2      std::cout << "Client " << client_socket << ": Trying to lock mutex1..." << std::endl;      pthread_mutex_lock(&mutex1);      usleep(5000000);// 休眠 5 秒      std::cout << "Client " << client_socket << ": Locked mutex1, now trying to lock mutex2..." << std::endl;      // 尝试获取 mutex2      pthread_mutex_lock(&mutex2);// 死锁发生点      usleep(5000000);// 休眠 5 秒      std::cout << "Client " << client_socket << ": Locked both mutex1 and mutex2" << std::endl;      // 释放互斥锁      pthread_mutex_unlock(&mutex2);      pthread_mutex_unlock(&mutex1);    } else if (client_socket == 5) {      // 客户端 2:先获取 mutex2,再获取 mutex1      std::cout << "Client " << client_socket << ": Trying to lock mutex2..." << std::endl;      pthread_mutex_lock(&mutex2);      usleep(5000000);// 休眠 5 秒      std::cout << "Client " << client_socket << ": Locked mutex2, now trying to lock mutex1..." << std::endl;      // 尝试获取 mutex1      pthread_mutex_lock(&mutex1);// 死锁发生点      usleep(5000000);// 休眠 5 秒      std::cout << "Client " << client_socket << ": Locked both mutex1 and mutex2" << std::endl;      // 释放互斥锁      pthread_mutex_unlock(&mutex1);      pthread_mutex_unlock(&mutex2);    }    // 关闭客户端连接    close(client_socket);    std::cout << "Client disconnected with socket: " << client_socket << std::endl;    pthread_exit(NULL);}// TCP 服务器主函数void start_server(int port) {    int server_fd, new_socket;    struct sockaddr_in address;    int opt = 1;    int addrlen = sizeof(address);    // 创建 socket 文件描述符    if ((server_fd = socket(AF_INET, SOCK_STREAM, 0)) == 0) {      perror("socket failed");      exit(EXIT_FAILURE);    }    // 设置 SO_REUSEADDR 选项    if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {      perror("setsockopt failed");      exit(EXIT_FAILURE);    }    // 绑定 socket 到指定端口    address.sin_family = AF_INET;    address.sin_addr.s_addr = INADDR_ANY;    address.sin_port = htons(port);    if (bind(server_fd, (struct sockaddr *)&address, sizeof(address)) < 0) {      perror("bind failed");      exit(EXIT_FAILURE);    }    // 监听连接    if (listen(server_fd, 3) < 0) {      perror("listen failed");      exit(EXIT_FAILURE);    }    std::cout << "Server started on port " << port << ". Waiting for connections..." << std::endl;    int client_id = 1;// 用于区分不同客户端    while (true) {      // 接受新的客户端连接      if ((new_socket = accept(server_fd, (struct sockaddr *)&address, (socklen_t*)&addrlen)) < 0) {            perror("accept failed");            continue;      }      // 为每个客户端创建一个新线程      pthread_t thread;      int* client_socket_ptr = new int(new_socket);// 动态分配存储套接字描述符的内存      if (pthread_create(&thread, NULL, handle_client, static_cast<void*>(client_socket_ptr)) != 0) {            perror("pthread_create failed");            delete client_socket_ptr;// 如果线程创建失败,释放内存            close(new_socket);            continue;      }      // 分离线程,允许其独立运行      pthread_detach(thread);      // 为了测试死锁,只接受前两个客户端连接      if (client_id >= 3) {            close(new_socket);// 关闭多余的连接            continue;      }      client_id++;    }}int main() {    int port = 8080;    start_server(port);    return 0;}
tcp_deadlock_server_c++0x.cpp
  编译运行telnet测试跟上面一样
  gdb调试死锁

  ps查看进程ID,然后gdb跟进程:gdb -p 11560
  查看所有线程:info threads,进入线程:thread 2 ,然后bt查看线程堆栈,切换另一个线程如上,就能看到两个线程都卡在了lock,具体调试步骤如下:
  
(gdb) info threads3 Thread 0x7fb6c0115700 (LWP 11562)0x0000003b5200dff4 in __lll_lock_wait () from /lib64/libpthread.so.02 Thread 0x7fb6bf714700 (LWP 11564)0x0000003b5200dff4 in __lll_lock_wait () from /lib64/libpthread.so.0* 1 Thread 0x7fb6c0117720 (LWP 11560)0x0000003b5200e7ed in accept () from /lib64/libpthread.so.0(gdb) thread 22 (Thread 0x7fb6bf714700 (LWP 11564))]#00x0000003b5200dff4 in __lll_lock_wait () from /lib64/libpthread.so.0(gdb) bt#00x0000003b5200dff4 in __lll_lock_wait () from /lib64/libpthread.so.0#10x0000003b52009328 in _L_lock_854 () from /lib64/libpthread.so.0#20x0000003b520091f7 in pthread_mutex_lock () from /lib64/libpthread.so.0#30x0000000000400f9f in handle_client (arg=0x13a8010) at tcp_deadlock_server.cpp:48#40x0000003b520077f1 in start_thread () from /lib64/libpthread.so.0#50x0000003b51ce570d in clone () from /lib64/libc.so.6(gdb) thread 33 (Thread 0x7fb6c0115700 (LWP 11562))]#00x0000003b5200dff4 in __lll_lock_wait () from /lib64/libpthread.so.0(gdb) bt#00x0000003b5200dff4 in __lll_lock_wait () from /lib64/libpthread.so.0#10x0000003b52009328 in _L_lock_854 () from /lib64/libpthread.so.0#20x0000003b520091f7 in pthread_mutex_lock () from /lib64/libpthread.so.0#30x0000000000400eb2 in handle_client (arg=0x13a8010) at tcp_deadlock_server.cpp:33#40x0000003b520077f1 in start_thread () from /lib64/libpthread.so.0#50x0000003b51ce570d in clone () from /lib64/libc.so.6
gdb调试死锁
  总结

  pstack和gdb都使用 ptrace() 系统调用来附着到目标进程。ptrace() 允许 GDB 暂停目标进程的执行,读取和修改其内存及寄存器,并捕获系统调用。
页: [1]
查看完整版本: c++死锁调试 ,gdb pstack