提高刮板速度
我正在创建一个小程序来刮擦代理,它可以正常工作,唯一的缺点是它花费了太长时间,我试图使用平行的时间来缩短时间,但是它仍然很慢,是否有任何方法可以加快流程的速度?
Stopwatch stopwatch = new Stopwatch();
string proxy = "";
int x = 0;
Console.Title = "Scraped proxies: 0";
string apisUnParsed =
"http://proxydb.net/\nhttp://www.cybersyndrome.net/pla.html\nhttp://www.proxz.com/proxy_list_ca_0.html\nhttp://www.proxz.com/proxy_list_high_anonymous_0.html\nhttp://proxy.ipcn.org/proxylist2.html\nhttp://torvpn.com/proxylist.html\nhttp://www.proxz.com/proxy_list_anonymous_us_0.html\nhttp://www.proxz.com/proxy_list_cn_ssl_0.html\nhttp://www.proxz.com/proxy_list_jp_0.html\nhttp://www.proxz.com/proxy_list_uk_0.html\nhttp://dogdev.net/Proxy/US?port=80\nhttp://www.atomintersoft.com/products/alive-proxy/proxy-list/\nhttp://www.atomintersoft.com/anonymous_proxy_list\nhttp://www.proxz.com/proxy_list_fr_0.html\nhttp://www.atomintersoft.com/high_anonymity_elite_proxy_list\nhttp://dogdev.net/Proxy/all\nhttp://www.proxylists.net/\nhttp://www.httptunnel.ge/ProxyListForFree.aspx\nhttp://www.proxylists.net/proxylist.shtml?HTTP\nhttp://anon-proxy.ru/|html|0\nhttp://proxies.my-proxy.com/proxy-list-1.html\nhttp://globalproxies.blogspot.com/\nhttp://proxies.my-proxy.com/proxy-list-2.html\nhttp://anon-proxy.ru/\nhttp://www.socks24.org/feeds/posts/default\nhttp://www.proxylists.net/http.txt\nhttp://aa8.narod.ru/index/0-9\nhttp://www.proxylists.net/http_highanon.txt\nhttp://proxylists.net/http.txt\nhttp://free-proxy-list.net/anonymous-proxy.html\nhttp://proxylists.net/http_highanon.txt\nhttp://ab57.ru/downloads/proxylist.txt\nhttp://www.us-proxy.org/\nhttps://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt\nhttp://free-socks24.blogspot.in//\nhttp://globalproxies.blogspot.com/search/label/US%20Proxies\nhttp://freepremiumproxy.blogspot.com\nhttp://aa8.narod.ru/index/0-10\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php%3Ftype%3Dhttp\nhttp://rootjazz.com/proxies/proxies.txt\nhttps://chinaproxylist.wordpress.com/feed/\nhttp://sslproxies24.blogspot.nl/feeds/posts/default\nhttp://www.sslproxies24.top/feeds/posts/default\nhttp://proxy-heaven.blogspot.com/\nhttp://sslproxies24.blogspot.ca/feeds/posts/default\nhttp://aa8.narod.ru/index/0-8\nhttps://free-socks24.blogspot.in/feeds/posts/default?alt=rss\nhttp://free-socks24.blogspot.in/feeds/posts/default?alt=rss\nhttp://alexa.lr2b.com/proxylist.txt\nhttp://absentius.narod.ru/\nhttps://autoproxyblog.wordpress.com/feed/\nhttp://www.changeips.com/\nhttp://mmm-downloads.at.ua/blog\nhttp://feeds.feedburner.com/AnonymousDailyProxyList\nhttp://freeproxylistsdaily.blogspot.in/feeds/posts/default\nhttp://proxyserverlist-24.blogspot.com/feeds/posts/default\nhttp://proxy-hunter.blogspot.com/feeds/posts/default\nhttps://proxy50-50.blogspot.com/\nhttp://free-fresh-proxy-daily.blogspot.com/feeds/posts/default\nhttp://rootjazz.com/proxies/proxies.txt\nhttp://www.live-socks.net/feeds/posts/default\nhttp://www.socks24.org/feeds/posts/default\nhttp://www.proxyserverlist24.top/feeds/posts/default\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php?type=http\nhttps://free-proxy-list.net/\nhttps://proxy-spider.com/api/proxies.example.txt\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php?type=socks\"\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt\nhttps://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt\nhttps://proxysource.org/api/proxies/getWorkingProxies?apiToken=17580e4438910c287cef15dca10b7912a26&latencyMax=10000&latencyMin=0&outputMode=plaintext\nhttp://spys.me/proxy.txt\nhttps://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all\nhttps://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt\nhttp://pubproxy.com/api/proxy?type=http&format=txt&limit=5\nhttp://pubproxy.com/api/proxy?type=http&format=txt&limit=5&https=true\nhttps://www.proxy-list.download/api/v1/get?type=http\nhttps://www.proxy-list.download/api/v1/get?type=https\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt\nhttp://pubproxy.com/api/proxy?type=socks4&format=txt&limit=5\nhttps://www.proxy-list.download/api/v1/get?type=socks4\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all&anonymity=all\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt\nhttp://pubproxy.com/api/proxy?type=socks5&format=txt&limit=5\nhttps://www.proxy-list.download/api/v1/get?type=socks5\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all&anonymity=all";
string[] APIS = apisUnParsed.Split('\n');
List<string> proxiesScraped = new List<string>();
WebClient connect = new WebClient();
connect.Headers["User-Agent"] =
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)";
stopwatch.Start();
for (int i = 0; i < APIS.Length; i++)
{
Console.WriteLine($"\r\nTHIS API IS: {APIS[i]}");
try
{
proxy += connect.DownloadString(APIS[i]);
Parallel.For(0, Regex.Matches(proxy, @"\d+\.\d+\.\d+\.\d+:\d+").Count, new ParallelOptions { MaxDegreeOfParallelism = Convert.ToInt32(Math.Ceiling((Environment.ProcessorCount * 0.75) * 3.0)) }, j =>
{
Console.WriteLine(Regex.Matches(proxy, @"\d+\.\d+\.\d+\.\d+:\d+")[j]);
proxiesScraped.Add(Regex.Matches(proxy, @"\d+\.\d+\.\d+\.\d+:\d+")[j].ToString());
x++;
Console.Title = $"Scraped proxies: {x.ToString()}";
});
proxy = "";
}
catch (Exception e)
{
}
}
stopwatch.Stop();
Console.WriteLine(stopwatch.Elapsed.TotalSeconds);
Console.ReadLine();
I am creating a small program to scrape proxies, it works correctly, the only drawback is that it takes too long, I have tried to use parallel to shorten the time but it is still very slow, is there any way to speed up the process?
Stopwatch stopwatch = new Stopwatch();
string proxy = "";
int x = 0;
Console.Title = "Scraped proxies: 0";
string apisUnParsed =
"http://proxydb.net/\nhttp://www.cybersyndrome.net/pla.html\nhttp://www.proxz.com/proxy_list_ca_0.html\nhttp://www.proxz.com/proxy_list_high_anonymous_0.html\nhttp://proxy.ipcn.org/proxylist2.html\nhttp://torvpn.com/proxylist.html\nhttp://www.proxz.com/proxy_list_anonymous_us_0.html\nhttp://www.proxz.com/proxy_list_cn_ssl_0.html\nhttp://www.proxz.com/proxy_list_jp_0.html\nhttp://www.proxz.com/proxy_list_uk_0.html\nhttp://dogdev.net/Proxy/US?port=80\nhttp://www.atomintersoft.com/products/alive-proxy/proxy-list/\nhttp://www.atomintersoft.com/anonymous_proxy_list\nhttp://www.proxz.com/proxy_list_fr_0.html\nhttp://www.atomintersoft.com/high_anonymity_elite_proxy_list\nhttp://dogdev.net/Proxy/all\nhttp://www.proxylists.net/\nhttp://www.httptunnel.ge/ProxyListForFree.aspx\nhttp://www.proxylists.net/proxylist.shtml?HTTP\nhttp://anon-proxy.ru/|html|0\nhttp://proxies.my-proxy.com/proxy-list-1.html\nhttp://globalproxies.blogspot.com/\nhttp://proxies.my-proxy.com/proxy-list-2.html\nhttp://anon-proxy.ru/\nhttp://www.socks24.org/feeds/posts/default\nhttp://www.proxylists.net/http.txt\nhttp://aa8.narod.ru/index/0-9\nhttp://www.proxylists.net/http_highanon.txt\nhttp://proxylists.net/http.txt\nhttp://free-proxy-list.net/anonymous-proxy.html\nhttp://proxylists.net/http_highanon.txt\nhttp://ab57.ru/downloads/proxylist.txt\nhttp://www.us-proxy.org/\nhttps://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list.txt\nhttp://free-socks24.blogspot.in//\nhttp://globalproxies.blogspot.com/search/label/US%20Proxies\nhttp://freepremiumproxy.blogspot.com\nhttp://aa8.narod.ru/index/0-10\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php%3Ftype%3Dhttp\nhttp://rootjazz.com/proxies/proxies.txt\nhttps://chinaproxylist.wordpress.com/feed/\nhttp://sslproxies24.blogspot.nl/feeds/posts/default\nhttp://www.sslproxies24.top/feeds/posts/default\nhttp://proxy-heaven.blogspot.com/\nhttp://sslproxies24.blogspot.ca/feeds/posts/default\nhttp://aa8.narod.ru/index/0-8\nhttps://free-socks24.blogspot.in/feeds/posts/default?alt=rss\nhttp://free-socks24.blogspot.in/feeds/posts/default?alt=rss\nhttp://alexa.lr2b.com/proxylist.txt\nhttp://absentius.narod.ru/\nhttps://autoproxyblog.wordpress.com/feed/\nhttp://www.changeips.com/\nhttp://mmm-downloads.at.ua/blog\nhttp://feeds.feedburner.com/AnonymousDailyProxyList\nhttp://freeproxylistsdaily.blogspot.in/feeds/posts/default\nhttp://proxyserverlist-24.blogspot.com/feeds/posts/default\nhttp://proxy-hunter.blogspot.com/feeds/posts/default\nhttps://proxy50-50.blogspot.com/\nhttp://free-fresh-proxy-daily.blogspot.com/feeds/posts/default\nhttp://rootjazz.com/proxies/proxies.txt\nhttp://www.live-socks.net/feeds/posts/default\nhttp://www.socks24.org/feeds/posts/default\nhttp://www.proxyserverlist24.top/feeds/posts/default\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php?type=http\nhttps://free-proxy-list.net/\nhttps://proxy-spider.com/api/proxies.example.txt\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php?type=socks\"\nhttp://proxysearcher.sourceforge.net/Proxy%20List.php\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks4.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks4.txt\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/socks5.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/http.txt\nhttps://raw.githubusercontent.com/ShiftyTR/Proxy-List/master/socks5.txt\nhttps://raw.githubusercontent.com/hookzof/socks5_list/master/proxy.txt\nhttps://raw.githubusercontent.com/TheSpeedX/SOCKS-List/master/http.txt\nhttps://proxysource.org/api/proxies/getWorkingProxies?apiToken=17580e4438910c287cef15dca10b7912a26&latencyMax=10000&latencyMin=0&outputMode=plaintext\nhttp://spys.me/proxy.txt\nhttps://api.proxyscrape.com/?request=getproxies&proxytype=all&country=all&ssl=all&anonymity=all\nhttps://raw.githubusercontent.com/clarketm/proxy-list/master/proxy-list-raw.txt\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/http.txt\nhttp://pubproxy.com/api/proxy?type=http&format=txt&limit=5\nhttp://pubproxy.com/api/proxy?type=http&format=txt&limit=5&https=true\nhttps://www.proxy-list.download/api/v1/get?type=http\nhttps://www.proxy-list.download/api/v1/get?type=https\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=http&timeout=10000&country=all&ssl=all&anonymity=all\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks4.txt\nhttp://pubproxy.com/api/proxy?type=socks4&format=txt&limit=5\nhttps://www.proxy-list.download/api/v1/get?type=socks4\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks4&timeout=10000&country=all&anonymity=all\nhttps://raw.githubusercontent.com/TheSpeedX/PROXY-List/master/socks5.txt\nhttp://pubproxy.com/api/proxy?type=socks5&format=txt&limit=5\nhttps://www.proxy-list.download/api/v1/get?type=socks5\nhttps://api.proxyscrape.com/v2/?request=displayproxies&protocol=socks5&timeout=10000&country=all&anonymity=all";
string[] APIS = apisUnParsed.Split('\n');
List<string> proxiesScraped = new List<string>();
WebClient connect = new WebClient();
connect.Headers["User-Agent"] =
"Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 5.1)";
stopwatch.Start();
for (int i = 0; i < APIS.Length; i++)
{
Console.WriteLine(quot;\r\nTHIS API IS: {APIS[i]}");
try
{
proxy += connect.DownloadString(APIS[i]);
Parallel.For(0, Regex.Matches(proxy, @"\d+\.\d+\.\d+\.\d+:\d+").Count, new ParallelOptions { MaxDegreeOfParallelism = Convert.ToInt32(Math.Ceiling((Environment.ProcessorCount * 0.75) * 3.0)) }, j =>
{
Console.WriteLine(Regex.Matches(proxy, @"\d+\.\d+\.\d+\.\d+:\d+")[j]);
proxiesScraped.Add(Regex.Matches(proxy, @"\d+\.\d+\.\d+\.\d+:\d+")[j].ToString());
x++;
Console.Title = quot;Scraped proxies: {x.ToString()}";
});
proxy = "";
}
catch (Exception e)
{
}
}
stopwatch.Stop();
Console.WriteLine(stopwatch.Elapsed.TotalSeconds);
Console.ReadLine();
如果你对这篇内容有疑问,欢迎到本站社区发帖提问 参与讨论,获取更多帮助,或者扫码二维码加入 Web 技术交流群。

绑定邮箱获取回复消息
由于您还没有绑定你的真实邮箱,如果其他用户或者作者回复了您的评论,将不能在第一时间通知您!
发布评论
评论(1)
这需要大约2500毫秒才能在我的计算机上执行。
This takes around 2500ms to execute on my machine.