我正在创建一个小程序来抓取代理程序,它工作正常,唯一的缺点是它花费的时间太长,我试图使用并行方法来缩短时间,但是它仍然很慢,有什么方法可以加快这个过程吗?
Stopwatch stopwatch = new Stopwatch();
string proxy = "";
int x = 0;
Console.Title = "Scraped proxies: 0";
string apisUnParsed =
"http://proxydb.net/\nhttp://www.cybersyndrome.net/pla.html\nhttp://www.proxz.com/proxy_list_ca_0.html\nhttp://www.proxz.com/proxy_list_high_anonymous_0.html\nhttp://proxy.ipcn.org/proxylist2.html\nhttp://torvpn.com/proxylist.html\nhttp://www.proxz.com/proxy_list_anonymous_us_0.html\nhttp://www.proxz.com/proxy_list_cn_ssl_0.html\nhttp://www.proxz.com/proxy_list_jp_0.html\nhttp://www.proxz.com/proxy_list_uk_0.html\nhttp://dogdev.net/Proxy/US?port=80\nhttp://www.atomintersoft.com/products/alive-proxy/proxy-list/\nhttp://www.atomintersoft.com/anonymous_proxy_list\nhttp://www.proxz.com/proxy_list_fr_0.html\nhttp://www.atomintersoft.com/high_anonymity_elite_proxy_list\nhttp://dogdev.net/Proxy/all\nhttp://www.proxylists.net/\nhttp://www.httptunnel.ge/ProxyListForFree.aspx\nhttp://www.proxylists.net/proxylist.shtml?HTTP\nhttp://anon-proxy.ru/|html|0\nhttp://proxies.my-proxy.com/proxy-list-1.html\nhttp://globalproxies.blogspot.com/\nhttp://proxies.my-proxy.com/proxy-list-2.html\nhttp://anon-proxy.ru/\nhttp://www.socks24.org/feeds/posts/default\nhttp://www.proxylists.net/http.txt\nhttp://aa8.narod.ru/index/0-9\nhttp://www.proxylists.net/http_highanon.txt\nhttp://proxylists.net/http.txt\nhttp://free-proxy-list.net/anonymous-proxy.html\nhttp://proxylists.net/http_highanon.txt\nhttp://ab57.ru/downloads/proxylist.txt\nhttp://www.us-proxy.org/\nhttps://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt\nhttp://free-socks24.blogspot.in//\nhttp://globalproxies.blogspot.com/search/label/US%20Proxies\nhttp://freepremiumproxy.blogspot.com\nhttp://aa8.narod.ru/index/0-10\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php%3Ftype%3Dhttp\nhttp://rootjazz.com/proxies/proxies.txt\nhttps://chinaproxylist.wordpress.com/feed/\nhttp://sslproxies24.blogspot.nl/feeds/posts/default\nhttp://www.sslproxies24.top/feeds/posts/default\nhttp://proxy-heaven.blogspot.com/\nhttp://sslproxies24.blogspot.ca/feeds/posts/default\nhttp://aa8.narod.ru/index/0-8\nhttps://free-socks24.blogspot.in/feeds/posts/default?alt=rss\nhttp://free-socks24.blogspot.in/feeds/posts/default?alt=rss\nhttp://alexa.lr2b.com/proxylist.txt\nhttp://absentius.narod.ru/\nhttps://autoproxyblog.wordpress.com/feed/\nhttp://www.changeips.com/\nhttp://mmm-downloads.at.ua/blog\nhttp://feeds.feedburner.com/AnonymousDailyProxyList\nhttp://freeproxylistsdaily.blogspot.in/feeds/posts/default\nhttp://proxyserverlist-24.blogspot.com/feeds/posts/default\nhttp://proxy-hunter.blogspot.com/feeds/posts/default\nhttps://proxy50-50.blogspot.com/\nhttp://free-fresh-proxy-daily.blogspot.com/feeds/posts/default\nhttp://rootjazz.com/proxies/proxies.txt\nhttp://www.live-socks.net/feeds/posts/default\nhttp://www.socks24.org/feeds/posts/default\nhttp://www.proxyserverlist24.top/feeds/posts/default\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php?type=http\nhttps://free-proxy-list.net/\nhttps://proxy-spider.com/api/proxies.example.txt\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php?type=socks\"\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt\nhttps://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt\nhttps://proxysource.org/api/proxies/getWorkingProxies?apiToken=17580e4438910c287cef15dca10b7912a26&latencyMax=10000&latencyMin=0&outputMode=plaintext\nhttp://spys.me/proxy.txt\nhttps://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all\nhttps://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt\nhttp://pubproxy.com/api/proxy?type=http&format=txt&limit=5\nhttp://pubproxy.com/api/proxy?type=http&format=txt&limit=5&https=true\nhttps://www.proxy-list.download/api/v1/get?type=http\nhttps://www.proxy-list.download/api/v1/get?type=https\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt\nhttp://pubproxy.com/api/proxy?type=socks4&format=txt&limit=5\nhttps://www.proxy-list.download/api/v1/get?type=socks4\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all&anonymity=all\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt\nhttp://pubproxy.com/api/proxy?type=socks5&format=txt&limit=5\nhttps://www.proxy-list.download/api/v1/get?type=socks5\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all&anonymity=all";
string[] APIS = apisUnParsed.Split('\n');
List<string> proxiesScraped = new List<string>();
WebClient connect = new WebClient();
connect.Headers["User-Agent"] =
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)";
stopwatch.Start();
for (int i = 0; i < APIS.Length; i++)
{
Console.WriteLine($"\r\nTHIS API IS: {APIS[i]}");
try
{
proxy += connect.DownloadString(APIS[i]);
Parallel.For(0, Regex.Matches(proxy, @"\d+\.\d+\.\d+\.\d+:\d+").Count, new ParallelOptions { MaxDegreeOfParallelism = Convert.ToInt32(Math.Ceiling((Environment.ProcessorCount * 0.75) * 3.0)) }, j =>
{
Console.WriteLine(Regex.Matches(proxy, @"\d+\.\d+\.\d+\.\d+:\d+")[j]);
proxiesScraped.Add(Regex.Matches(proxy, @"\d+\.\d+\.\d+\.\d+:\d+")[j].ToString());
x++;
Console.Title = $"Scraped proxies: {x.ToString()}";
});
proxy = "";
}
catch (Exception e)
{
}
}
stopwatch.Stop();
Console.WriteLine(stopwatch.Elapsed.TotalSeconds);
Console.ReadLine();
发布于 2022-07-04 11:46:04
这需要大约2500毫秒才能在我的机器上执行。
private static async Task Main()
{
var proxyUrlToParse =
"http://proxydb.net/\nhttp://www.cybersyndrome.net/pla.html\nhttp://www.proxz.com/proxy_list_ca_0.html\nhttp://www.proxz.com/proxy_list_high_anonymous_0.html\nhttp://proxy.ipcn.org/proxylist2.html\nhttp://torvpn.com/proxylist.html\nhttp://www.proxz.com/proxy_list_anonymous_us_0.html\nhttp://www.proxz.com/proxy_list_cn_ssl_0.html\nhttp://www.proxz.com/proxy_list_jp_0.html\nhttp://www.proxz.com/proxy_list_uk_0.html\nhttp://dogdev.net/Proxy/US?port=80\nhttp://www.atomintersoft.com/products/alive-proxy/proxy-list/\nhttp://www.atomintersoft.com/anonymous_proxy_list\nhttp://www.proxz.com/proxy_list_fr_0.html\nhttp://www.atomintersoft.com/high_anonymity_elite_proxy_list\nhttp://dogdev.net/Proxy/all\nhttp://www.proxylists.net/\nhttp://www.httptunnel.ge/ProxyListForFree.aspx\nhttp://www.proxylists.net/proxylist.shtml?HTTP\nhttp://anon-proxy.ru/|html|0\nhttp://proxies.my-proxy.com/proxy-list-1.html\nhttp://globalproxies.blogspot.com/\nhttp://proxies.my-proxy.com/proxy-list-2.html\nhttp://anon-proxy.ru/\nhttp://www.socks24.org/feeds/posts/default\nhttp://www.proxylists.net/http.txt\nhttp://aa8.narod.ru/index/0-9\nhttp://www.proxylists.net/http_highanon.txt\nhttp://proxylists.net/http.txt\nhttp://free-proxy-list.net/anonymous-proxy.html\nhttp://proxylists.net/http_highanon.txt\nhttp://ab57.ru/downloads/proxylist.txt\nhttp://www.us-proxy.org/\nhttps://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt\nhttp://free-socks24.blogspot.in//\nhttp://globalproxies.blogspot.com/search/label/US%20Proxies\nhttp://freepremiumproxy.blogspot.com\nhttp://aa8.narod.ru/index/0-10\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php%3Ftype%3Dhttp\nhttp://rootjazz.com/proxies/proxies.txt\nhttps://chinaproxylist.wordpress.com/feed/\nhttp://sslproxies24.blogspot.nl/feeds/posts/default\nhttp://www.sslproxies24.top/feeds/posts/default\nhttp://proxy-heaven.blogspot.com/\nhttp://sslproxies24.blogspot.ca/feeds/posts/default\nhttp://aa8.narod.ru/index/0-8\nhttps://free-socks24.blogspot.in/feeds/posts/default?alt=rss\nhttp://free-socks24.blogspot.in/feeds/posts/default?alt=rss\nhttp://alexa.lr2b.com/proxylist.txt\nhttp://absentius.narod.ru/\nhttps://autoproxyblog.wordpress.com/feed/\nhttp://www.changeips.com/\nhttp://mmm-downloads.at.ua/blog\nhttp://feeds.feedburner.com/AnonymousDailyProxyList\nhttp://freeproxylistsdaily.blogspot.in/feeds/posts/default\nhttp://proxyserverlist-24.blogspot.com/feeds/posts/default\nhttp://proxy-hunter.blogspot.com/feeds/posts/default\nhttps://proxy50-50.blogspot.com/\nhttp://free-fresh-proxy-daily.blogspot.com/feeds/posts/default\nhttp://rootjazz.com/proxies/proxies.txt\nhttp://www.live-socks.net/feeds/posts/default\nhttp://www.socks24.org/feeds/posts/default\nhttp://www.proxyserverlist24.top/feeds/posts/default\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php?type=http\nhttps://free-proxy-list.net/\nhttps://proxy-spider.com/api/proxies.example.txt\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php?type=socks\"\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt\nhttps://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt\nhttps://proxysource.org/api/proxies/getWorkingProxies?apiToken=17580e4438910c287cef15dca10b7912a26&latencyMax=10000&latencyMin=0&outputMode=plaintext\nhttp://spys.me/proxy.txt\nhttps://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all\nhttps://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt\nhttp://pubproxy.com/api/proxy?type=http&format=txt&limit=5\nhttp://pubproxy.com/api/proxy?type=http&format=txt&limit=5&https=true\nhttps://www.proxy-list.download/api/v1/get?type=http\nhttps://www.proxy-list.download/api/v1/get?type=https\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt\nhttp://pubproxy.com/api/proxy?type=socks4&format=txt&limit=5\nhttps://www.proxy-list.download/api/v1/get?type=socks4\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all&anonymity=all\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt\nhttp://pubproxy.com/api/proxy?type=socks5&format=txt&limit=5\nhttps://www.proxy-list.download/api/v1/get?type=socks5\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all&anonymity=all";
string[] urls = proxyUrlToParse.Split('\n');
var blockingCollection = new BlockingCollection<string>();
var client = new HttpClient();
var sw = Stopwatch.StartNew();
IEnumerable<Task> producerTasks = urls.Select(url => Task.Run(async () =>
{
try
{
blockingCollection.Add(await client.GetStringAsync(url));
}
catch (Exception e)
{
//Console.WriteLine(e.Message);
}
})).ToArray();
Task<List<string>> consumerTask = Task.Run(() => blockingCollection
.GetConsumingEnumerable()
.AsParallel()
.WithMergeOptions(ParallelMergeOptions.NotBuffered)
.WithDegreeOfParallelism(3)
.SelectMany(c =>
{
return Regex.Matches(c, @"\d+\.\d+\.\d+\.\d+:\d+").Select(m => m.Value);
}).ToList());
await Task.WhenAll(producerTasks);
blockingCollection.CompleteAdding();
List<string> proxies = await consumerTask;
Console.WriteLine($"Completed fetching {proxies.Count} proxies in {sw.ElapsedMilliseconds}ms");
}
https://stackoverflow.com/questions/72846872
复制相似问题