我有一个程序,它从一个文件中读取,这是一个域名列表。它执行异步DNS,然后使用异步epoll循环下载每个域的登陆页。
该程序在数千次迭代中运行良好,然后执行*** buffer overflow detected ***: terminated
错误。下面是回溯:
Program received signal SIGABRT, Aborted.
__pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351415616) at pthread_kill.c:44
44 pthread_kill.c: No such file or directory.
(gdb) bt
#0 __pthread_kill_implementation (no_tid=0, signo=6, threadid=140737351415616) at pthread_kill.c:44
#1 __pthread_kill_internal (signo=6, threadid=140737351415616) at pthread_kill.c:80
#2 __GI___pthread_kill (threadid=140737351415616, signo=signo@entry=6) at pthread_kill.c:91
#3 0x00007ffff7db0476 in __GI_raise (sig=sig@entry=6) at ../sysdeps/posix/raise.c:26
#4 0x00007ffff7d967b7 in __GI_abort () at abort.c:79
#5 0x00007ffff7df75e6 in __libc_message (action=action@entry=do_abort, fmt=fmt@entry=0x7ffff7f48ef4 "*** %s ***: terminated\n") at ../sysdeps/posix/libc_fatal.c:155
#6 0x00007ffff7ea322a in __GI___fortify_fail (msg=msg@entry=0x7ffff7f48e9a "buffer overflow detected") at fortify_fail.c:26
#7 0x00007ffff7ea1b46 in __GI___chk_fail () at chk_fail.c:28
#8 0x00007ffff7ea316b in __fdelt_chk (d=<optimised out>) at fdelt_chk.c:25
#9 0x00007ffff7f97362 in ares_fds () from /lib/x86_64-linux-gnu/libcares.so.2
#10 0x000055555555682d in wait_ares (channel=0x555556bb32a0) at epoll_recv_with_async_dns.c:80
#11 0x000055555555773c in main (argc=2, argv=0x7fffffffe0a8) at epoll_recv_with_async_dns.c:303
如您所见,反向跟踪指向对ares_fds
的调用。违规的代码行是:
nfds = ares_fds(channel, &read_fds, &write_fds);
我看不出这一行代码是如何存在缓冲区溢出的。任何想法,我可以做进一步的调试,并找到和解决问题。对于那些感兴趣的人,下面是一个最低限度的复制者:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/socket.h>
#include <resolv.h>
#include <sys/epoll.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <time.h>
#include <ares.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#define MAXWAITING 1000 /* Max. number of parallel DNS queries */
#define MAXTRIES 3 /* Max. number of tries per domain */
#define DNSTIMEOUT 3000 /* Max. number of ms for first try */
#define SERVERS "1.0.0.1,8.8.8.8" /* DNS server to use (Cloudflare & Google) */
#define MAXDOMAINS 8192
#define MAX_CONNECTIONS 8192
#define TIMEOUT 10000
int epfd;
int sockfd[MAX_CONNECTIONS];
struct epoll_event event[MAX_CONNECTIONS];
struct sockaddr_in dest[MAX_CONNECTIONS];
char resolved[MAXDOMAINS][254];
char ips[MAXDOMAINS][128];
int current = 0, active = 0, next = 0;
char servers[MAX_CONNECTIONS][128];
char domains[MAX_CONNECTIONS][254];
int i, num_ready, connections = 0, done = 0, total_bytes = 0, total_domains = 0, iterations = 0, count = 0;
static int nwaiting;
static void state_cb(void *data, int s, int read, int write)
{
//printf("Change state fd %d read:%d write:%d\n", s, read, write);
}
static void callback(void *arg, int status, int timeouts, struct hostent *host)
{
nwaiting--;
if(!host || status != ARES_SUCCESS){
//fprintf(stderr, "Failed to lookup %s\n", ares_strerror(status));
return;
}
char ip[INET6_ADDRSTRLEN];
if (host->h_addr_list[0] != NULL){
inet_ntop(host->h_addrtype, host->h_addr_list[0], ip, sizeof(ip));
strcpy(resolved[current], host->h_name);
strcpy(ips[current], ip);
if (current < MAXDOMAINS - 1) current++; else current = 0;
active++;
printf("active %d\r", active);
}
}
static void wait_ares(ares_channel channel)
{
struct timeval *tvp, tv;
fd_set read_fds, write_fds;
int nfds = 0;
FD_ZERO(&read_fds);
FD_ZERO(&write_fds);
nfds = ares_fds(channel, &read_fds, &write_fds);
if (nfds > 0) {
tvp = ares_timeout(channel, NULL, &tv);
select(nfds, &read_fds, &write_fds, NULL, tvp);
ares_process(channel, &read_fds, &write_fds);
}
}
int main(int argc, char *argv[]) {
sigaction(SIGPIPE, &(struct sigaction){SIG_IGN}, NULL);
FILE * fp;
char domain[128];
size_t len = 0;
ssize_t read;
ares_channel channel;
int status, dns_done = 0;
int optmask;
status = ares_library_init(ARES_LIB_INIT_ALL);
if (status != ARES_SUCCESS) {
printf("ares_library_init: %s\n", ares_strerror(status));
return 1;
}
struct ares_options options = {
.timeout = DNSTIMEOUT, /* set first query timeout */
.tries = MAXTRIES /* set max. number of tries */
};
optmask = ARES_OPT_TIMEOUTMS | ARES_OPT_TRIES;
status = ares_init_options(&channel, &options, optmask);
if (status != ARES_SUCCESS) {
printf("ares_init_options: %s\n", ares_strerror(status));
return 1;
}
status = ares_set_servers_csv(channel, SERVERS);
if (status != ARES_SUCCESS) {
printf("ares_set_servers_csv: %s\n", ares_strerror(status));
return 1;
}
fp = fopen(argv[1], "r");
if (!fp)
exit(EXIT_FAILURE);
do{
if (nwaiting >= MAXWAITING || dns_done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!dns_done) {
if (fscanf(fp, "%128s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
dns_done = 1;
}
}
} while (active < MAX_CONNECTIONS);
/*---Open sockets for streaming---*/
for (i = 0; i < MAX_CONNECTIONS; i++)
{
if ( (sockfd[i] = socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0)) < 0 ) {
perror("Socket");
exit(errno);
}
count++;
}
while (1)
{
/*---Do async DNS---*/
while (/*active < MAXDOMAINS &&*/ nwaiting > 0) {
//printf("active = %d MAXDOMAINS = %d nwaiting = %d MAXWAITING = %d\n", active, MAXDOMAINS, nwaiting, MAXWAITING);
if (nwaiting >= MAXWAITING || dns_done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!dns_done) {
if (fscanf(fp, "%127s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
dns_done = 1;
}
}
} //while (active < MAXDOMAINS);
if (done && count == 0) break;
}
ares_destroy(channel);
ares_library_cleanup();
fclose(fp);
printf("\nFinished without errors\n");
return 0;
}
如果我注释掉创建套接字的部分,则不会发生中止:
/*---Open sockets for streaming---*/
for (i = 0; i < MAX_CONNECTIONS; i++)
{
if ( (sockfd[i] = socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0)) < 0 ) {
perror("Socket");
exit(errno);
}
count++;
}
因此,不管问题是什么,这都与我有许多套接字文件描述符有关。有什么想法吗?
进一步编辑:
进一步的调试似乎表明问题与打开的套接字数量有关。如果我将创建的套接字数量减少到1017,则中止将不再发生。如果我创建了1018个套接字,程序就会中止。
发布于 2022-01-03 22:19:13
看起来这可能是根本原因:
https://c-ares.org/mail/c-ares-archive-2017-08/0002.shtml
The堆栈跟踪如上面所示。>>> >>> /(gdb) bt/ >>> /#0 0x00007f959c01ac37 in __GI_raise (sig=sig_at_entry=6) at >>> ./nptl/sysdeps/unix/sysv/linux/rev.c:56/ >>> /#1 0x00007f959c01e028 in __GI_abort ()fmt=fmt_at_entry=0x7f959c166d70“** %s >>> *:%s >>> \n”)/ >>> / at ./sysdeps/posix/libc_attal.c:175/>>> /#3 0x00007f959c0f283c (msg=,>>> msg_at_entry=0x7f959c166d07“缓冲区溢出检测到”),在>>> fortify_fail.c:38/ >>> /#4 0x00007f959c0f1710 in __GI___chk_fail () at chk_fail.c:28/ >>> /#5 0x00007f959c0f2787 in __fdelt_chk (d=) at >>> fdelt_chk.c:25/ >>> /#6 0x00007f959c6b69ad in ares_fds ()时,从>>> () >>> 0x0000000000000040b448 in >>> () at 19##/home/necs/dev/apat/source/recorder/recdns.c:157/ >>> /#8 0x000000004052f2 in rec_main_thread (data=0x0) at >>> >>> /#9 0x00000000403de1 (argc=7,在/home/necs/dev/apat/source/recorder/main.c:129/ >> .
你要么超过了FD_SETSIZE的限制,要么有负数的fds。Glibc在内部检查这一点,如果检查失败,将导致崩溃:https://github.com/lattera/glibc/blob/master/debug/fdelt_chk.c
丹尼尔在2017-08-01上收到的
由于我不确定您在哪个平台上,所以在调用ares_fds()之前,我不能推荐一种检查值的好方法,只能跟踪之前的nfds (故障前的返回值)。
发布于 2022-01-04 17:04:06
正如paulsm4所回答的,问题与打开的套接字描述符的数量有关。为了解决这个问题,我重新实现了程序以使用epoll
而不是select
,从而绕过了这个问题。完整代码清单如下:
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/socket.h>
#include <resolv.h>
#include <sys/epoll.h>
#include <arpa/inet.h>
#include <unistd.h>
#include <time.h>
#include <ares.h>
#include <netinet/in.h>
#include <netdb.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <errno.h>
#define MAXWAITING 1000 /* Max. number of parallel DNS queries */
#define MAXTRIES 3 /* Max. number of tries per domain */
#define DNSTIMEOUT 3000 /* Max. number of ms for first try */
#define DNS_MAX_EVENTS 10000
#define DNS_MAX_SERVERS 2
#define SERVERS "1.0.0.1,8.8.8.8" /* DNS server to use (Cloudflare & Google) */
#define MAXDOMAINS 8192
#define PORT 80
#define MAXBUF 1024
#define MAX_EPOLL_EVENTS 8192
#define MAX_CONNECTIONS 8192
#define TIMEOUT 10000
ares_socket_t dns_client_fds[ARES_GETSOCK_MAXNUM] = {0};
struct epoll_event ev, dns_events[DNS_MAX_EVENTS];
int i,bitmask,nfds, epollfd, timeout, fd_count, ret;
int epfd;
int sockfd[MAX_CONNECTIONS];
struct epoll_event event[MAX_CONNECTIONS];
struct sockaddr_in dest[MAX_CONNECTIONS];
char resolved[MAXDOMAINS][254];
char ips[MAXDOMAINS][128];
int current = 0, active = 0, next = 0;
char servers[MAX_CONNECTIONS][128];
char domains[MAX_CONNECTIONS][254];
char get_buffer[MAX_CONNECTIONS][1024];
char buffer[MAX_CONNECTIONS][MAXBUF];
int buffer_used[MAX_CONNECTIONS];
struct timespec startTime, stopTime;
int i, num_ready, connections = 0, done = 0, total_bytes = 0, total_domains = 0, iterations = 0, count = 0;
FILE * fp;
struct epoll_event events[MAX_EPOLL_EVENTS];
static int nwaiting;
static void state_cb(void *data, int s, int read, int write)
{
//printf("Change state fd %d read:%d write:%d\n", s, read, write);
}
static void callback(void *arg, int status, int timeouts, struct hostent *host)
{
nwaiting--;
if(!host || status != ARES_SUCCESS){
//fprintf(stderr, "Failed to lookup %s\n", ares_strerror(status));
return;
}
char ip[INET6_ADDRSTRLEN];
if (host->h_addr_list[0] != NULL){
inet_ntop(host->h_addrtype, host->h_addr_list[0], ip, sizeof(ip));
strcpy(resolved[current], host->h_name);
strcpy(ips[current], ip);
if (current < MAXDOMAINS - 1) current++; else current = 0;
active++;
printf("active %d\r", active);
}
}
static void wait_ares(ares_channel channel)
{
nfds=0;
bitmask=0;
for (i =0; i < DNS_MAX_SERVERS ; i++) {
if (dns_client_fds[i] > 0) {
if (epoll_ctl(epollfd, EPOLL_CTL_DEL, dns_client_fds[i], NULL) < 0) {
continue;
}
}
}
memset(dns_client_fds, 0, sizeof(dns_client_fds));
bitmask = ares_getsock(channel, dns_client_fds, DNS_MAX_SERVERS);
for (i =0; i < DNS_MAX_SERVERS ; i++) {
if (dns_client_fds[i] > 0) {
ev.events = 0;
if (ARES_GETSOCK_READABLE(bitmask, i)) {
ev.events |= EPOLLIN;
}
if (ARES_GETSOCK_WRITABLE(bitmask, i)) {
ev.events |= EPOLLOUT;
}
ev.data.fd = dns_client_fds[i];
if (epoll_ctl(epollfd, EPOLL_CTL_ADD, dns_client_fds[i], &ev) < 0) {
if(errno == EEXIST) {
nfds++;
continue;
}
continue;
}
nfds++;
}
}
if(nfds==0)
{
return;
}
timeout = 1000;//millisecs
fd_count = epoll_wait(epollfd, dns_events, DNS_MAX_EVENTS, timeout);
if (fd_count < 0) {
return;
}
if (fd_count > 0) {
for (i = 0; i < fd_count; ++i) {
ares_process_fd(channel, ((dns_events[i].events) & (EPOLLIN) ? dns_events[i].data.fd:ARES_SOCKET_BAD), ((dns_events[i].events) & (EPOLLOUT)? dns_events[i].data.fd:ARES_SOCKET_BAD));
}
} else {
ares_process_fd(channel, ARES_SOCKET_BAD, ARES_SOCKET_BAD);
}
}
void make_socket_and_connect (int sock)
{
if ( (sockfd[sock] = socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0)) < 0 ) {
perror("Socket");
exit(errno);
}
count++;
event[sock].events = EPOLLIN|EPOLLOUT;
event[sock].data.fd = sockfd[sock];
epoll_ctl(epfd, EPOLL_CTL_ADD, sockfd[sock], &event[sock]);
bzero(&dest[sock], sizeof(dest[sock]));
dest[sock].sin_family = AF_INET;
dest[sock].sin_port = htons(PORT);
if ( inet_pton(AF_INET, servers[sock], &dest[sock].sin_addr.s_addr) == 0 ) {
printf("\n");
perror(servers[sock]);
exit(errno);
}
if ( connect(sockfd[sock], (struct sockaddr*)&dest[sock], sizeof(dest[sock])) != 0 ) {
if(errno != EINPROGRESS) {
printf("%s\n", servers[sock]);
perror("Connect again ");
//exit(errno);
}
buffer_used[sock] = 0;
}
}
int is_valid_ip(char *domain)
{
if (!strcmp(domain, "255.255.255.255"))
return 0;
if (!strcmp(domain, "192.168.1.0"))
return 0;
if (!strcmp(domain, "127.0.0.0"))
return 0;
return 1;
}
void close_socket (int socket)
{
close(sockfd[socket]);
count--;
epoll_ctl(epfd, EPOLL_CTL_DEL, sockfd[socket], &event[socket]);
}
void get_domain_and_ip(int id)
{
//close_socket(id);
active--;
get_domain_name:
strcpy(servers[id], ips[next]);
strcpy(domains[id], resolved[next]);
if (next < (MAXDOMAINS - 1)) next++; else next = 0;
if (is_valid_ip(servers[id]))
{
make_socket_and_connect(id);
total_domains++;
}
else
goto get_domain_name;
}
void get_domain_and_ip_without_connect(int id)
{
get_domain_name2:
strcpy(servers[id], ips[next]);
strcpy(domains[id], resolved[next]);
if (next < (MAXDOMAINS - 1)) next++; else next = 0;
if (!is_valid_ip(servers[id]))
goto get_domain_name2;
}
void get_time()
{
clock_gettime(CLOCK_MONOTONIC, &stopTime);
uint64_t msElapsed = (stopTime.tv_nsec - startTime.tv_nsec) / 1000000 + (stopTime.tv_sec - startTime.tv_sec) * 1000;
double seconds = (double)msElapsed / 1000.0;
iterations++;
fprintf(stderr, "iterations=%d total domains=%d elapsed=%2.2fs domains/s=%2.2f KB=%d Mbit/s=%2.2f num_ready=%d count=%d active=%d end\r"
, iterations, total_domains, seconds, total_domains/seconds, total_bytes/1024, 8*total_bytes/seconds/1024/1204, num_ready, count, active);
}
ssize_t send_data(int id)
{
ssize_t nByte = send(sockfd[id], get_buffer[id] + buffer_used[id], strlen(get_buffer[id]) - buffer_used[id], 0);
return nByte;
}
ssize_t recv_data(int id)
{
ssize_t nByte = recv(sockfd[id], buffer[id], sizeof(buffer[id]), 0);
return nByte;
}
int wait()
{
int ret = epoll_wait(epfd, events, MAX_EPOLL_EVENTS, TIMEOUT/*timeout*/);
return ret;
}
int main(int argc, char *argv[]) {
sigaction(SIGPIPE, &(struct sigaction){SIG_IGN}, NULL);
FILE * fp;
char domain[254];
size_t len = 0;
ssize_t read;
ares_channel channel;
int status, dns_done = 0;
int optmask;
status = ares_library_init(ARES_LIB_INIT_ALL);
if (status != ARES_SUCCESS) {
printf("ares_library_init: %s\n", ares_strerror(status));
return 1;
}
struct ares_options options = {
.timeout = DNSTIMEOUT, /* set first query timeout */
.tries = MAXTRIES /* set max. number of tries */
};
optmask = ARES_OPT_TIMEOUTMS | ARES_OPT_TRIES;
status = ares_init_options(&channel, &options, optmask);
if (status != ARES_SUCCESS) {
printf("ares_init_options: %s\n", ares_strerror(status));
return 1;
}
status = ares_set_servers_csv(channel, SERVERS);
if (status != ARES_SUCCESS) {
printf("ares_set_servers_csv: %s\n", ares_strerror(status));
return 1;
}
memset(dns_client_fds, 0, sizeof(dns_client_fds));
memset((char *)&ev, 0, sizeof(struct epoll_event));
memset((char *)&dns_events[0], 0, sizeof(dns_events));
epollfd = epoll_create(DNS_MAX_SERVERS);
fp = fopen(argv[1], "r");
if (!fp)
exit(EXIT_FAILURE);
do{
if (nwaiting >= MAXWAITING || dns_done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!dns_done) {
if (fscanf(fp, "%253s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
//fprintf(stderr, "done sending\n");
dns_done = 1;
}
}
} while (active < MAX_CONNECTIONS);
/*---Open sockets for streaming---*/
for (i = 0; i < MAX_CONNECTIONS; i++)
{
if ( (sockfd[i] = socket(AF_INET, SOCK_STREAM|SOCK_NONBLOCK, 0)) < 0 ) {
perror("Socket");
exit(errno);
}
count++;
}
/*---Add sockets to epoll---*/
epfd = epoll_create1(0);
for (i = 0; i < MAX_CONNECTIONS; i++)
{
event[i].events = EPOLLIN|EPOLLOUT;
event[i].data.fd = sockfd[i];
epoll_ctl(epfd, EPOLL_CTL_ADD, sockfd[i], &event[i]);
}
/*---Initialize server address/port structs---*/
for (i = 0; i < MAX_CONNECTIONS; i++)
{
get_domain_and_ip_without_connect(i);
//printf("%s %s\n", servers[i], domains[i]);
bzero(&dest[i], sizeof(dest[i]));
dest[i].sin_family = AF_INET;
dest[i].sin_port = htons(PORT);
if ( inet_pton(AF_INET, servers[i], &dest[i].sin_addr.s_addr) == 0 ) {
perror(servers[i]);
exit(errno);
}
}
/*---Connect to servers---*/
for (i = 0; i < MAX_CONNECTIONS; i++)
{
if ( connect(sockfd[i], (struct sockaddr*)&dest[i], sizeof(dest[i])) != 0 ) {
if(errno != EINPROGRESS) {
perror("Connect ");
//exit(errno);
}
buffer_used[i] = 0;
}
}
clock_gettime(CLOCK_MONOTONIC, &startTime);
while (1)
{
/*---Do async DNS---*/
while (active < MAXDOMAINS && nwaiting > 0) {
//printf("active = %d MAXDOMAINS = %d nwaiting = %d MAXWAITING = %d\n", active, MAXDOMAINS, nwaiting, MAXWAITING);
if (nwaiting >= MAXWAITING || dns_done) {
do {
wait_ares(channel);
} while (nwaiting > MAXWAITING);
}
if (!dns_done) {
if (fscanf(fp, "%253s", domain) == 1) {
ares_gethostbyname(channel, domain, AF_INET, callback, NULL);
nwaiting++;
} else {
//fprintf(stderr, "done sending\n");
dns_done = 1;
}
}
} //while (active < MAXDOMAINS);
/*---Wait to be able to send---*/
num_ready = wait();
get_time();
if (!num_ready) break;
for(i = 0; i < num_ready; i++) {
int index;
if(events[i].events & EPOLLOUT) {
for (int j = 0; j < MAX_CONNECTIONS; j++)
{
if (events[i].data.fd == sockfd[j])
{
index = j;
break;
}
}
snprintf(get_buffer[index], sizeof(get_buffer[index]),
"GET %s HTTP/1.1\r\nHost: %s\r\nUser-Agent: Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36\r\n\r\n", "/", domains[i]);
ssize_t nByte = 0;
if (buffer_used[index] < strlen(get_buffer[index]))
nByte = send_data(index);
if (nByte > 0)
{
buffer_used[index] += nByte;
total_bytes += nByte;
}
if (nByte == -1 && errno == EPIPE)
{
get_domain_and_ip(index);
}
}
if(events[i].events & EPOLLIN) {
for (int j = 0; j < MAX_CONNECTIONS; j++)
{
if (events[i].data.fd == sockfd[j])
{
index = j;
break;
}
}
bzero(buffer[index], MAXBUF);
ssize_t nByte = recv_data(index);
//if (nByte > 0) printf("Received: %s from %s at %s \n", buffer[index], domains[index], servers[index]);
if (nByte > 0) total_bytes += nByte;
if (nByte == 0)
{
close_socket(index);
if (!done)
{
get_domain_and_ip(index);
}
}
}
}
get_time();
if (done && count == 0) break;
}
ares_destroy(channel);
ares_library_cleanup();
fclose(fp);
printf("\nFinished without errors\n");
return 0;
}
https://stackoverflow.com/questions/70570379
复制相似问题