AutohotKey 示例
; AutoHotkey 支持 http
;
; 米扑代理示例:
; https://proxy.mimvp.com/demo
;
; 米扑代理购买:
; https://proxy.mimvp.com
;
; mimvp.com
; 2016-11-29
;
; AutoHotkey 安装与开发,请参考米扑博客,附有源代码:
; https://blog.mimvp.com/article/19337.html
; ========== 代理设置方式1 ==========
URL := "http://proxy.mimvp.com/exist"
RESULT_FILE := "result_curl.txt"
MIMVP_PROXY := "138.68.165.154:3128"
Runcurl = curl.exe "%URL%" -x "%MIMVP_PROXY%" -o "%RESULT_FILE%"
;Runcurl = curl.exe "%URL%" -x user:password@proxyhost:port -o "%RESULT_FILE%"
Runwait, %comspec% /c %RunCurl%, , Hide
; ========== 代理设置方式2 ==========
MIMVP_PROXY_NOAUTH := 2
MIMVP_PROXY_AUTH := 1
;~ 代理服务器
MIMVP_PROXY := "138.68.165.154:3128"
;~ 要访问的目标页面
mimvp_url := "http://proxy.mimvp.com/exist"
mimvp_url2 = "https://proxy.mimvp.com/exist"
whr := ComObjCreate("WinHttp.WinHttpRequest.5.1")
whr.SetTimeouts(30000,30000,30000,30000) ;~ Set timeouts to 30 seconds
whr.Open("GET", mimvp_url, true)
whr.SetRequestHeader("User-Agent", "curl/7.41.0") ;~ 模拟curl的ua
;~ 设置代理服务器
whr.SetProxy(MIMVP_PROXY_NOAUTH, MIMVP_PROXY)
;~ 设置代理隧道验证信息
;whr.SetCredentials('mimvp-user', 'mimvp-pass', MIMVP_PROXY_AUTH)
whr.Send()
whr.WaitForResponse()
MsgBox % whr.ResponseText ; 输入到消息框,网页内容太长则显示不完整
; 打开对话框选择文件,写入完整的网页内容
FileSelectFile, resultName, S16,, Create a new file:
if (resultName = "")
return
outFile := FileOpen(resultName , "w" , "utf-8")
if !IsObject(outFile)
{
MsgBox , 不能打开文件: %resultName%
return
}
outFile.write(whr.ResponseText)
outFile.Close()
C# 示例
/**
C# 支持 http、https
米扑代理示例:
https://proxy.mimvp.com/demo
米扑代理购买:
https://proxy.mimvp.com
mimvp.com
2015-11-09
*/
using System;
using System.IO;
using System.Net;
using System.Text;
namespace ConsoleApplication1
{
class Program
{
// 代码级代理
public static void Main(string[] args)
{
System.Net.WebProxy proxy = new WebProxy("218.21.230.156", 4435); // "107.150.96.188", 8080
HttpWebRequest request = (HttpWebRequest)WebRequest.Create("https://proxy.mimvp.com/ip");
request.Proxy = proxy;
using (WebResponse response = request.GetResponse())
{
using (TextReader reader = new StreamReader(response.GetResponseStream()))
{
string line;
while ((line = reader.ReadLine()) != null)
Console.WriteLine(line);
}
}
}
// 系统变量级代理
public static void Main2()
{
// your code goes here
System.Net.WebProxy proxy = new WebProxy("107.150.96.188", 8080);
System.Net.HttpWebRequest request = (HttpWebRequest)WebRequest.Create("https://proxy.mimvp.com/ip");
request.Proxy = proxy;
request.Timeout = 30 * 1000;
System.Net.HttpWebResponse resp = (HttpWebResponse)request.GetResponse();
Encoding bin = Encoding.GetEncoding("UTF-8");
StreamReader reader = new StreamReader(resp.GetResponseStream(), bin);
string str = reader.ReadToEnd();
Console.WriteLine(str);
reader.Close();
reader.Dispose();
}
// 系统变量级代理密码授权 (感谢网友贡献代码:1725242984@qq.com)
public static void Main3()
{
// 要访问的目标页面
string targetUrl = "https://proxy.mimvp.com/ip";
// 代理ip和隧道验证密码
// 获取用户名密码请见米扑代理 - 会员中心:https://proxy.mimvp.com/usercenter/?p=whiteip
string proxyHost = "107.150.96.188";
string proxyPort = "8080";
string proxyUser = "mimvp-user";
string proxyPass = "mimvp-pass";
// 设置代理服务器
WebProxy proxy = new WebProxy(string.Format("{0}:{1}", proxyHost, proxyPort), true);
ServicePointManager.Expect100Continue = false;
var request = WebRequest.Create(targetUrl) as HttpWebRequest;
request.AllowAutoRedirect = true;
request.KeepAlive = true;
request.Method = "GET";
request.Proxy = proxy;
request.Proxy.Credentials = new System.Net.NetworkCredential(proxyUser, proxyPass);
using (var response = request.GetResponse() as HttpWebResponse)
using (var sr = new StreamReader(response.GetResponseStream(), Encoding.UTF8))
{
string htmlStr = sr.ReadToEnd();
}
}
}
}
Delphi 示例
# Delphi 支持 http
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2016-11-25
interface
uses
Windows, Messages, SysUtils, Variants, Classes, Graphics, Controls, Forms,
Dialogs, StdCtrls, IDHTTP;
type
TForm1 = class(TForm)
Button1: TButton;
procedure Button1Click(Sender: TObject);
private
{ Private declarations }
public
{ Public declarations }
end;
var
Form1: TForm1;
implementation
{$R * .dfm}
procedure TForm1.Button1Click(Sender: TObject);
const
// 要访问的目标页面
mimvp_url = "http://proxy.mimvp.com/exist"
mimvp_url2 = "https://proxy.mimvp.com/exist"
mimvp_url3 = "https://apps.bdimg.com/libs/jquery-i18n/1.1.1/jquery.i18n.min.js"
// 代理服务器
proxy_ip = '138.68.165.154';
proxy_port = 3128;
var
IDHTTP1 : TIDHTTP;
begin
Application.ProcessMessages;
IDHTTP1 : = TIDHTTP.Create(nil);
with IDHTTP1 do
begin
AllowCookies : = True;
HandleRedirects : = True;
ProxyParams.BasicAuthentication : = True;
ProxyParams.ProxyServer : = proxy_ip;
ProxyParams.ProxyPort : = proxy_port;
# ProxyParams.ProxyUsername : = 'mimvp-user';
# ProxyParams.ProxyPassword : = 'mimvp-pass';
Request.Method : = 'GET';
Request.Accept : = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8';
Request.AcceptEncoding : = 'gzip, deflate, sdch';
Request.AcceptLanguage : = 'zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4';
Request.Connection : = 'keep-alive';
Request.UserAgent : = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36';
IDHTTP1.Get(mimvp_url);
end;
end;
end.
Go 示例
/**
* go 支持 http、socks5
*
* 米扑代理示例:
* https://proxy.mimvp.com/demo
*
* 米扑代理购买:
* https://proxy.mimvp.com
*
* mimvp.com
* 2017.6.20
*/
package main
import (
"fmt"
"io/ioutil"
"log"
"net/http"
"net/url"
"os"
"golang.org/x/net/proxy"
)
// http(方法1)
func main_1(proxy_uri string, mimvp_url string) {
proxy := func(_ *http.Request) (*url.URL, error) {
return url.Parse(proxy_uri)
}
transport := &http.Transport{Proxy: proxy}
client := &http.Client{Transport: transport}
resp, err := client.Get(mimvp_url)
if err != nil {
fmt.Println("error : ", err)
return
} else {
defer resp.Body.Close()
body, _ := ioutil.ReadAll(resp.Body)
fmt.Printf("%s\n", body)
}
}
// http(方法2)
func main_2(proxy_uri string, mimvp_url string) {
url_i := url.URL{}
url_proxy, _ := url_i.Parse(proxy_uri)
transport := &http.Transport{Proxy: http.ProxyURL(url_proxy)}
client := http.Client{Transport: transport}
resp, err := client.Get(mimvp_url)
if err != nil {
log.Fatalln(err)
} else {
defer resp.Body.Close()
body, _ := ioutil.ReadAll(resp.Body)
fmt.Printf("%s\n", body)
}
}
// config environment varable
func main_22(proxy_uri string, mimvp_url string) {
// url_i := url.URL{}
// url_proxy, _ := url_i.Parse("https://127.0.0.1:9743")
os.Setenv("HTTP_PROXY", "http://125.77.25.124:80")
os.Setenv("HTTPS_PROXY", "https://210.209.89.100:8081")
c := http.Client{ /* Transport: &http.Transport{ // Proxy: http.ProxyURL(url_proxy)} */ }
resp, err := c.Get("http://mimvp.com")
if err != nil {
log.Fatalln(err)
} else {
defer resp.Body.Close()
body, _ := ioutil.ReadAll(resp.Body)
fmt.Printf("%s\n", body)
}
}
// 指定代理ip
func getTransportFieldURL(proxy_addr *string) (transport *http.Transport) {
url_i := url.URL{}
url_proxy, _ := url_i.Parse(*proxy_addr)
transport = &http.Transport{Proxy: http.ProxyURL(url_proxy)}
return
}
// 从环境变量$http_proxy或$HTTP_PROXY中获取HTTP代理地址
// Linux 设置代理环境变量命令:
// http_proxy=http://125.77.25.124:80
// https_proxy=https://210.209.89.100:8081
func getTransportFromEnvironment() (transport *http.Transport) {
transport = &http.Transport{Proxy: http.ProxyFromEnvironment}
return
}
func fetch(mimvp_url, proxy_uri *string) (html string) {
transport := getTransportFieldURL(proxy_uri)
client := &http.Client{Transport: transport}
req, err := http.NewRequest("GET", *mimvp_url, nil)
req.Header.Set("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36")
if err != nil {
log.Fatal(err.Error())
}
resp, err := client.Do(req)
if err != nil {
log.Fatal(err.Error())
}
if resp.StatusCode == 200 {
robots, err := ioutil.ReadAll(resp.Body)
resp.Body.Close()
if err != nil {
log.Fatal(err.Error())
}
html = string(robots)
} else {
html = ""
}
return
}
// http(方法3)
func main_3(proxy_uri string, mimvp_url string) {
html := fetch(&mimvp_url, &proxy_uri)
fmt.Println(html)
}
// socks5
// 1. download https://github.com/golang/net
// 2. unzip and move to /usr/local/go/src/golang.org/x/net/
// 3. import ( "golang.org/x/net/proxy" )
func main_socks5(proxy_socks string, mimvp_url string) {
dialer, err := proxy.SOCKS5("tcp", proxy_socks, nil, proxy.Direct)
if err != nil {
fmt.Println(os.Stderr, "can't connect to socks5 proxy:", err)
os.Exit(1)
}
// setup a http client
// transport := &http.Transport{}
// transport.Dial = dialer.Dial
transport := &http.Transport{Dial: dialer.Dial}
client := &http.Client{Transport: transport}
resp, err := client.Get(mimvp_url)
if err != nil {
log.Fatalln(err)
} else {
defer resp.Body.Close()
body, _ := ioutil.ReadAll(resp.Body)
fmt.Printf("%s\n", body)
}
}
func main() {
// http
proxy_uri := "http://125.77.25.124:80"
mimvp_url := "http://proxy.mimvp.com/exist"
main_1(proxy_uri, mimvp_url)
main_2(proxy_uri, mimvp_url)
main_3(proxy_uri, mimvp_url)
// socks5
proxy_socks := "175.138.65.244:1080"
main_socks5(proxy_socks, mimvp_url)
}
// 执行命令:
// $ go build mimvp-proxy-go.go
// $ ./mimvp-proxy-go
Groovy 示例
/**
* Groovy 支持 http
*
* 米扑代理示例:
* https://proxy.mimvp.com/demo
*
* 米扑代理购买:
* https://proxy.mimvp.com
*
* mimvp.com
* 2017-07-18
*
* Groovy 爬取网页示例:
* https://blog.mimvp.com/article/19213.html
*/
@Grab(group='org.codehaus.groovy.modules.http-builder', module='http-builder', version='0.7.1' )
import groovyx.net.http.HTTPBuilder
import static groovyx.net.http.ContentType.*
import static groovyx.net.http.Method.*
import org.apache.http.auth.*
class MimvpSpider {
static def proxy_http = "http://208.92.93.218:1080"
static def mimvp_url = "http://proxy.mimvp.com/exist"
static def mimvp_url2 = "https://proxy.mimvp.com/exist"
static main(args) {
spider_proxy(MimvpSpider.mimvp_url, MimvpSpider.proxy_http) // http
spider_proxy(MimvpSpider.mimvp_url2, MimvpSpider.proxy_http) // https
}
static spider_proxy(mimvp_url, proxy) {
def http = new HTTPBuilder(mimvp_url)
// http.client.getCredentialsProvider().setCredentials(
// new AuthScope("myproxy.com", 8080),
// new UsernamePasswordCredentials("proxy-username", "proxy-password")
// )
def proxy_type = proxy.split("://")[0]
def proxy_ip = proxy.split("://")[1].split(":")[0]
def proxy_port = proxy.split("://")[1].split(":")[1]
proxy_port = proxy_port.toInteger()
println proxy_type
println proxy_ip
println proxy_port
// http.setProxy('myproxy.com', 8080, 'http')
http.setProxy(proxy_ip, proxy_port, proxy_type)
http.ignoreSSLIssues() // 访问https网站,不需验证, module='http-builder', version='0.7.1'
http.request( GET, TEXT ){ req ->
response.success = { resp, reader ->
println "Response: ${reader.text}"
}
}
}
}
HTTPie 示例
#!/bin/bash
#
# http 支持 http、https
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2015-11-10
# http代理格式 --proxy http:IP:Port
# https代理格式 --proxy https:IP:Port
$ http --help
usage: http [--json] [--form] [--pretty {all,colors,format,none}]
[--style STYLE] [--print WHAT] [--headers] [--body] [--verbose]
[--all] [--history-print WHAT] [--stream] [--output FILE]
[--download] [--continue]
[--session SESSION_NAME_OR_PATH | --session-read-only SESSION_NAME_OR_PATH]
[--auth USER[:PASS]] [--auth-type {basic,digest}]
[--proxy PROTOCOL:PROXY_URL] [--follow]
[--max-redirects MAX_REDIRECTS] [--timeout SECONDS]
[--check-status] [--verify VERIFY]
[--ssl {ssl2.3,ssl3,tls1,tls1.1,tls1.2}] [--cert CERT]
[--cert-key CERT_KEY] [--ignore-stdin] [--help] [--version]
[--traceback] [--default-scheme DEFAULT_SCHEME] [--debug]
[METHOD] URL [REQUEST_ITEM [REQUEST_ITEM ...]]
# http
http --proxy "http:217.107.197.174:8081" http://proxy.mimvp.com/exist
# https
http --verify no --proxy "https:46.105.214.133:3128" https://proxy.mimvp.com/exist
Java 示例
/**
* Java 支持 http、https、socks4、socks5
*
* 米扑代理示例:
* https://proxy.mimvp.com/demo
*
* 米扑代理购买:
* https://proxy.mimvp.com
*
* mimvp.com
* 2015-11-09
*/
package com.mimvp;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.Authenticator;
import java.net.InetSocketAddress;
import java.net.PasswordAuthentication;
import java.net.Proxy;
import java.net.Socket;
import java.net.URL;
import java.net.URLConnection;
import java.security.KeyManagementException;
import java.security.NoSuchAlgorithmException;
import java.security.cert.X509Certificate;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Properties;
import javax.net.ssl.HostnameVerifier;
import javax.net.ssl.HttpsURLConnection;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLSession;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
public class MimvpProxyJava {
@SuppressWarnings({ "serial" })
public static HashMap proxyMap = new HashMap() {
{
put("http", "138.68.161.14:3128");
put("https", "61.216.1.23:3128");
put("socks4", "115.238.247.205:1080");
put("socks5", "122.146.58.135:16405");
}
};
public static String PROXY_USERNAME = "username";
public static String PROXY_PASSWORD = "password";
@SuppressWarnings({ "serial" })
public static HashMap proxyAuthMap = new HashMap() {
{
put("http", "120.24.177.37:6474");
put("https", "120.24.177.37:6474");
put("socks5", "120.24.177.37:6476");
}
};
final static String proxyUrl = "http://proxy.mimvp.com/test_proxy2"; // http
final static String proxyUrl2 = "https://proxy.mimvp.com/test_proxy2"; // https
// 全局禁止ssl证书验证,防止访问非验证的https网址无法访问,例如:https://mimvp.com
static {
disableSslVerification();
}
private static void disableSslVerification() {
try {
// Create a trust manager that does not validate certificate chains
TrustManager[] trustAllCerts = new TrustManager[] { new X509TrustManager() {
public java.security.cert.X509Certificate[] getAcceptedIssuers() {
return null;
}
public void checkClientTrusted(X509Certificate[] certs, String authType) {
}
public void checkServerTrusted(X509Certificate[] certs, String authType) {
}
}};
// Install the all-trusting trust manager
SSLContext sc = SSLContext.getInstance("SSL");
sc.init(null, trustAllCerts, new java.security.SecureRandom());
HttpsURLConnection.setDefaultSSLSocketFactory(sc.getSocketFactory());
// Create all-trusting host name verifier
HostnameVerifier allHostsValid = new HostnameVerifier() {
public boolean verify(String hostname, SSLSession session) {
return true;
}
};
// Install the all-trusting host verifier
HttpsURLConnection.setDefaultHostnameVerifier(allHostsValid);
} catch (NoSuchAlgorithmException e) {
e.printStackTrace();
} catch (KeyManagementException e) {
e.printStackTrace();
}
}
// 主入口函数
public static void main(String args[]){
spider_proxy(); // 通过API实时获取米扑代理
proxy_no_auth(); // 代理无用户名密码授权
proxy_auth(); // 代理需要用户名密码授权
}
// 通过API实时获取米扑代理
public static void spider_proxy() {
String proxy_url = "https://proxyapi.mimvp.com/api/fetchsecret?orderid=868435221231212345&http_type=3";
URL url = null;
try {
url = new URL(proxy_url);
URLConnection conn = url.openConnection();
conn.setDoOutput(true);
conn.setReadTimeout(30 * 1000); // 设置超时30秒
conn.setConnectTimeout(30 * 1000); // 连接超时30秒
InputStream in = conn.getInputStream();
InputStreamReader reader = new InputStreamReader(in);
char[] ch = new char[1024];
int len = 0;
String data = "";
while((len = reader.read(ch)) > 0) {
String newData = new String(ch, 0, len);
data += newData;
}
String[] proxy_list;
proxy_list = data.split("\n");
for (String proxy : proxy_list) {
System.out.println(proxy);
}
in.close();
} catch (Exception e) {
e.printStackTrace();
}
}
// 使用代理,无用户名密码授权
public static void proxy_no_auth() {
int dataLen = 0;
// proxy protocol 只支持 http、socks5
dataLen = proxy_protocol("http", MimvpProxyJava.proxyMap.get("http")); // http
System.out.println("http : " + MimvpProxyJava.proxyMap.get("http") + " --> " + dataLen);
dataLen = proxy_protocol("socks5", MimvpProxyJava.proxyMap.get("socks5")); // socks5
System.out.println("socks5 : " + MimvpProxyJava.proxyMap.get("socks5") + " --> " + dataLen);
// proxy property 支持http、https、socks4、socks5
Iterator it2 = MimvpProxyJava.proxyMap.keySet().iterator();
while(it2.hasNext()){
String proxyType = it2.next();
String proxyIpPort = MimvpProxyJava.proxyMap.get(proxyType);
dataLen = proxy_property(proxyType, proxyIpPort);
System.out.println(proxyType + " : " + proxyIpPort + " --> " + dataLen);
}
// proxy socks
Iterator it3 = MimvpProxyJava.proxyMap.keySet().iterator();
while(it3.hasNext()){
String proxyType = it3.next();
String proxyIpPort = MimvpProxyJava.proxyMap.get(proxyType);
dataLen = proxy_socks(proxyType, proxyIpPort);
System.out.println(proxyType + " : " + proxyIpPort + " --> " + dataLen);
}
}
// 使用代理,需要用户名密码授权,请先取消授权的注释(代码里有注释说明)
public static void proxy_auth() {
int dataLen = 0;
// proxy protocol 只支持 http、socks5
dataLen = proxy_protocol("http", MimvpProxyJava.proxyAuthMap.get("http")); // http
System.out.println("http : " + MimvpProxyJava.proxyAuthMap.get("http") + " --> " + dataLen);
dataLen = proxy_protocol("socks5", MimvpProxyJava.proxyAuthMap.get("socks5")); // socks5
System.out.println("socks5 : " + MimvpProxyJava.proxyAuthMap.get("socks5") + " --> " + dataLen);
// proxy property 支持http、https、socks4、socks5
Iterator it2 = MimvpProxyJava.proxyAuthMap.keySet().iterator();
while(it2.hasNext()){
String proxyType = it2.next();
String proxyIpPort = MimvpProxyJava.proxyAuthMap.get(proxyType);
dataLen = proxy_property(proxyType, proxyIpPort);
System.out.println(proxyType + " : " + proxyIpPort + " --> " + dataLen);
}
}
// 设置系统代理,支持全部协议 http,https,socks4,socks5
private static int proxy_property(String proxyType, String proxyIpPort) {
int dataLen = 0;
String proxy_ip = proxyIpPort.split(":")[0];
String proxy_port = proxyIpPort.split(":")[1];
Properties prop = System.getProperties();
// http
if(proxyType.equals("http")){
prop.setProperty("http.proxySet", "true");
prop.setProperty("http.proxyHost", proxy_ip);
prop.setProperty("http.proxyPort", proxy_port);
prop.setProperty("http.nonProxyHosts", "localhost|192.168.0.*");
}
// https
if (proxyType.equals("https")) {
prop.setProperty("https.proxyHost", proxy_ip);
prop.setProperty("https.proxyPort", proxy_port);
}
// socks
if(proxyType.equals("socks4") || proxyType.equals("socks5")){
prop.setProperty("socksProxySet", "true");
prop.setProperty("socksProxyHost", proxy_ip);
prop.setProperty("socksProxyPort", proxy_port);
}
// ftp
if(proxyType.equals("ftp")){
prop.setProperty("ftp.proxyHost", proxy_ip);
prop.setProperty("ftp.proxyPort", proxy_port);
prop.setProperty("ftp.nonProxyHosts", "localhost|192.168.0.*");
}
// // auth 代理需要用户名和密码授权时开启,取消此注释,米扑代理验证通过
// Authenticator.setDefault(new MyAuthenticator(MimvpProxyJava.PROXY_USERNAME, MimvpProxyJava.PROXY_PASSWORD));
try{
URL url = new URL(proxyUrl); // 默认访问http网页
if(proxyType.equals("https")) { // 若为https协议,则访问https网页
url = new URL(proxyUrl2);
}
URLConnection conn = url.openConnection();
conn.setConnectTimeout(30 * 1000);
InputStream in = conn.getInputStream();
InputStreamReader reader = new InputStreamReader(in);
char[] ch = new char[1024];
int len = 0;
String data = "";
while((len = reader.read(ch)) > 0) {
String newData = new String(ch, 0, len);
data += newData;
}
in.close();
System.out.println("data : " + data);
dataLen = data.length();
} catch(Exception e) {
e.printStackTrace();
}
prop.clear(); // 清空属性参数,防止下次设置代理时残留脏数据
prop = null;
return dataLen;
}
static class MyAuthenticator extends Authenticator {
private String username = "";
private String password = "";
public MyAuthenticator(String username, String password) {
this.username = username;
this.password = password;
}
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(this.username, this.password.toCharArray());
}
}
// 使用函数协议,仅支持 HTTP 和 SOCKS5
private static int proxy_protocol(String proxyType, String proxyIpPort) {
int dataLen = 0;
String proxy_ip = proxyIpPort.split(":")[0];
int proxy_port = Integer.parseInt(proxyIpPort.split(":")[1]);
try{
URL url = new URL(proxyUrl); // http://proxy.mimvp.com
// // auth 代理需要用户名密码授权时开启,取消此注释,米扑代理验证通过
// Authenticator.setDefault(new MyAuthenticator(MimvpProxyJava.PROXY_USERNAME, MimvpProxyJava.PROXY_PASSWORD));
InetSocketAddress addr = new InetSocketAddress(proxy_ip, proxy_port);
Proxy proxy = new Proxy(Proxy.Type.HTTP, addr);
if(proxyType.equals("socks4") || proxyType.equals("socks5")) {
proxy = new Proxy(Proxy.Type.SOCKS, addr);
}
URLConnection conn = url.openConnection(proxy);
conn.setConnectTimeout(30 * 1000); // 连接超时30秒
InputStream in = conn.getInputStream();
InputStreamReader reader = new InputStreamReader(in);
char[] ch = new char[1024];
int len = 0;
String data = "";
while((len = reader.read(ch)) > 0) {
String newData = new String(ch, 0, len);
data += newData;
}
in.close();
System.out.println("data : " + data);
dataLen = data.length();
} catch(Exception e) {
e.printStackTrace();
}
return dataLen;
}
// proxy socket,测试用
private static int proxy_socks(String proxyType, String proxyIpPort) {
int dataLen = 0;
Socket socket = null;
String proxy_ip = proxyIpPort.split(":")[0];
int proxy_port = Integer.parseInt(proxyIpPort.split(":")[1]);
try {
socket = new Socket(proxy_ip, proxy_port);
byte[] ch = new String("GET http://www.mimvp.com/ HTTP/1.1\r\n\r\n").getBytes();
socket.getOutputStream().write(ch);
socket.setSoTimeout(30 * 1000);
byte[] bt = new byte[1024];
InputStream in = socket.getInputStream();
int len = 0;
String data = "";
while((len = in.read(bt)) > 0) {
String newData = new String(bt, 0, len);
data += newData;
}
System.out.println("data : " + data);
dataLen = data.length();
}catch(Exception e) {
e.printStackTrace();
} finally{
try {
if(socket != null){
socket.close();
}
} catch (IOException e) {
e.printStackTrace();
}
socket = null;
}
return dataLen;
}
// 启动运用通过JVM参数走代理, 注意这种代理是全局的,设置以后全部会自动走代理,
// 如果需要单个请求走代理(在走代理失败的话, 会自动尝试本地直接访问), 请使用proxy_protocol()协议代理函数
// -DproxySet=true
// -Dhttp.proxyHost=proxyIp
// -Dhttp.proxyPort=proxyPort
}
//////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////// Java HttpClient 示例 ///////////////////////////////////////
//////////////////////////////////////////////////////////////////////////////////////////////////
/**
* Java HttpClient 支持 http、https、socks5
*
* HttpClient不是Java JDK默认的类,因此需要添加jar包
* HttpClient jar包下载:http://hc.apache.org/downloads.cgi
*
* 米扑代理示例用的引用jar包如下:
* commons-codec-1.10.jar
* commons-logging-1.2.jar
* httpclient-4.5.5.jar
* httpcore-4.4.9.jar
*
* 米扑代理,已开源此示例和依赖jar包,上传到了Gitub:
* https://github.com/mimvp/mimvp-proxy-demo
*
* 米扑代理示例:
* https://proxy.mimvp.com/demo
*
* 米扑代理购买:
* https://proxy.mimvp.com
*
* mimvp.com
* 2015-11-09
*/
package com.mimvp;
import java.io.IOException;
import java.net.Authenticator;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.PasswordAuthentication;
import java.net.Proxy;
import java.net.Socket;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.HashMap;
import javax.net.ssl.SSLContext;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.HttpStatus;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.DnsResolver;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.impl.client.BasicCredentialsProvider;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.impl.conn.DefaultProxyRoutePlanner;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.apache.http.protocol.HttpContext;
import org.apache.http.ssl.SSLContexts;
import org.apache.http.util.EntityUtils;
public class MimvpProxyJava2 {
@SuppressWarnings({ "serial" })
public static HashMap proxyMap = new HashMap() {
{
put("http", "138.68.161.14:3128");
put("https", "39.137.37.8:80");
put("socks4", "222.83.247.90:1080");
put("socks5", "111.207.55.16:1080");
}
};
public static String PROXY_USERNAME = "username";
public static String PROXY_PASSWORD = "password";
@SuppressWarnings({ "serial" })
public static HashMap proxyAuthMap = new HashMap() {
{
put("http", "120.24.177.37:4821");
put("https", "120.24.177.37:4821");
put("socks5", "120.24.177.37:4825");
}
};
final static String proxyUrl = "http://proxy.mimvp.com/test_proxy2"; // http
final static String proxyUrl2 = "https://proxy.mimvp.com/test_proxy2"; // https
// 主入口函数
public static void main(String args[]){
// 通过API实时获取米扑代理
spider_proxy();
// 代理无用户名密码授权
proxy_no_auth("http", MimvpProxyJava2.proxyMap.get("http"), proxyUrl); // http(proxy)
proxy_no_auth("https", MimvpProxyJava2.proxyMap.get("https"), proxyUrl2); // https(proxy)
proxy_no_auth2("http", MimvpProxyJava2.proxyMap.get("http"), proxyUrl); // http(config)
proxy_no_auth3("http", MimvpProxyJava2.proxyMap.get("http"), proxyUrl); // http(routePlanner)
proxy_no_auth_socks("socks5", MimvpProxyJava2.proxyMap.get("socks5"), proxyUrl); // socks5支持访问http网址
proxy_no_auth_socks("socks5", MimvpProxyJava2.proxyMap.get("socks5"), proxyUrl2); // socks5支持访问https网址
// 代理需要用户名密码授权
proxy_auth("http", MimvpProxyJava2.proxyAuthMap.get("http"), proxyUrl); // http
proxy_auth("https", MimvpProxyJava2.proxyAuthMap.get("https"), proxyUrl2); // https
proxy_auth_socks("socks5", MimvpProxyJava2.proxyAuthMap.get("socks5"), proxyUrl); // socks5支持访问http网址
proxy_auth_socks("socks5", MimvpProxyJava2.proxyAuthMap.get("socks5"), proxyUrl2); // socks5支持访问https网址
}
// 通过API实时获取米扑代理
public static void spider_proxy() {
String proxy_url = "https://proxyapi.mimvp.com/api/fetchsecret?orderid=868435221231212345&http_type=3";
try {
@SuppressWarnings({ "resource", "deprecation" })
HttpClient client = new DefaultHttpClient(); // 舍弃的用法
CloseableHttpClient client2 = HttpClients.createDefault(); // 推荐的用法
HttpGet request = new HttpGet(proxy_url);
HttpResponse response = client2.execute(request);
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
String result = EntityUtils.toString(response.getEntity());
String[] proxy_list;
proxy_list = result.split("\n");
for (String proxy : proxy_list) {
System.out.println(proxy);
}
}
client2.close();
} catch (Exception e) {
System.out.println(e.toString());
}
}
// ############################## 代理无授权 ##############################
// 方法1:无密代理, 支持 http、https(proxy)
public static void proxy_no_auth(String proxyType, String proxyIpPort, String webUrl) {
System.out.println(proxyType + " , " + proxyIpPort + " , " + webUrl);
String proxy_ip = proxyIpPort.split(":")[0];
int proxy_port = Integer.parseInt(proxyIpPort.split(":")[1]);
try {
CloseableHttpClient client = HttpClients.createDefault();
HttpHost proxy = new HttpHost(proxy_ip, proxy_port);
HttpGet request = new HttpGet(webUrl);
request.addHeader("Host","proxy.mimvp.com");
request.addHeader("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36");
HttpResponse response = client.execute(proxy, request);
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
String result = EntityUtils.toString(response.getEntity());
System.out.println(result);
}
client.close();
} catch (Exception e) {
System.out.println(e.toString());
}
}
// 方法2:无密代理, 支持 http(config)
public static void proxy_no_auth2(String proxyType, String proxyIpPort, String webUrl) {
System.out.println(proxyType + " , " + proxyIpPort + " , " + webUrl);
String proxy_ip = proxyIpPort.split(":")[0];
int proxy_port = Integer.parseInt(proxyIpPort.split(":")[1]);
try {
HttpClientBuilder builder = HttpClientBuilder.create();
CloseableHttpClient client = builder.build();
URL url = new URL(webUrl);
HttpHost target = new HttpHost(url.getHost(), url.getDefaultPort(), url.getProtocol());
if(proxyType.equals("https")) {
target = new HttpHost(url.getHost(), 443, "https");
}
HttpHost proxy = new HttpHost(proxy_ip, proxy_port);
RequestConfig config = RequestConfig.custom().setProxy(proxy).build();
HttpGet request = new HttpGet(url.getPath());
request.setConfig(config);
request.addHeader("Host","proxy.mimvp.com");
request.addHeader("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36");
HttpResponse response = client.execute(target, request);
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
String result = EntityUtils.toString(response.getEntity());
System.out.println(result);
}
client.close();
} catch (Exception e) {
System.out.println(e.toString());
}
}
// 方法3:无密代理, 支持 http(routePlanner)
public static void proxy_no_auth3(String proxyType, String proxyIpPort, String webUrl) {
System.out.println(proxyType + " , " + proxyIpPort + " , " + webUrl);
String proxy_ip = proxyIpPort.split(":")[0];
int proxy_port = Integer.parseInt(proxyIpPort.split(":")[1]);
try {
HttpHost proxy = new HttpHost(proxy_ip, proxy_port);
DefaultProxyRoutePlanner routePlanner = new DefaultProxyRoutePlanner(proxy);
CloseableHttpClient client = HttpClients.custom().setRoutePlanner(routePlanner).build();
HttpGet request = new HttpGet(webUrl);
request.addHeader("Host","proxy.mimvp.com");
request.addHeader("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36");
HttpResponse response = client.execute(request);
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
String result = EntityUtils.toString(response.getEntity());
System.out.println(result);
}
client.close();
} catch (Exception e) {
System.out.println(e.toString());
}
}
// ############################## 代理用户名密码授权 ##############################
// HttpClient 支持socks5代理的自定义类
static class MyConnectionSocketFactory extends PlainConnectionSocketFactory {
@Override
public Socket createSocket(final HttpContext context) throws IOException {
InetSocketAddress socksaddr = (InetSocketAddress) context.getAttribute("socks.address");
Proxy proxy = new Proxy(Proxy.Type.SOCKS, socksaddr);
return new Socket(proxy);
}
@Override
public Socket connectSocket(int connectTimeout, Socket socket, HttpHost host, InetSocketAddress remoteAddress,
InetSocketAddress localAddress, HttpContext context) throws IOException {
InetSocketAddress unresolvedRemote = InetSocketAddress.createUnresolved(host.getHostName(), remoteAddress.getPort());
return super.connectSocket(connectTimeout, socket, host, unresolvedRemote, localAddress, context);
}
}
static class MySSLConnectionSocketFactory extends SSLConnectionSocketFactory {
public MySSLConnectionSocketFactory(final SSLContext sslContext) {
// super(sslContext, ALLOW_ALL_HOSTNAME_VERIFIER);
super(sslContext);
}
@Override
public Socket createSocket(final HttpContext context) throws IOException {
InetSocketAddress socksaddr = (InetSocketAddress) context.getAttribute("socks.address");
Proxy proxy = new Proxy(Proxy.Type.SOCKS, socksaddr);
return new Socket(proxy);
}
@Override
public Socket connectSocket(int connectTimeout, Socket socket, HttpHost host, InetSocketAddress remoteAddress,
InetSocketAddress localAddress, HttpContext context) throws IOException {
InetSocketAddress unresolvedRemote = InetSocketAddress.createUnresolved(host.getHostName(), remoteAddress.getPort());
return super.connectSocket(connectTimeout, socket, host, unresolvedRemote, localAddress, context);
}
}
static class FakeDnsResolver implements DnsResolver {
@Override
public InetAddress[] resolve(String host) throws UnknownHostException {
// Return some fake DNS record for every request, we won't be using it
return new InetAddress[] { InetAddress.getByAddress(new byte[] { 1, 1, 1, 1 }) };
}
}
// 无密代理, 支持 socks5
public static void proxy_no_auth_socks(String proxyType, String proxyIpPort, String webUrl) {
System.out.println(proxyType + " , " + proxyIpPort + " , " + webUrl);
String proxy_ip = proxyIpPort.split(":")[0];
int proxy_port = Integer.parseInt(proxyIpPort.split(":")[1]);
try {
Registry reg = RegistryBuilder. create()
.register("http", new MyConnectionSocketFactory())
.register("https", new MySSLConnectionSocketFactory(SSLContexts.createSystemDefault()))
.build();
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(reg, new FakeDnsResolver());
CloseableHttpClient client = HttpClients.custom().setConnectionManager(cm).build();
InetSocketAddress addr = new InetSocketAddress(proxy_ip, proxy_port);
HttpClientContext context = HttpClientContext.create();
context.setAttribute("socks.address", addr);
HttpGet request = new HttpGet(webUrl);
request.addHeader("Host","proxy.mimvp.com");
request.addHeader("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36");
HttpResponse response = client.execute(request, context);
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
String result = EntityUtils.toString(response.getEntity());
System.out.println(result);
}
} catch (Exception e) {
System.out.println(e.toString());
}
}
// 有密代理,需要用户名密码授权,请先取消授权的注释(代码里有注释说明)
public static void proxy_auth(String proxyType, String proxyIpPort, String webUrl) {
System.out.println(proxyType + " , " + proxyIpPort + " , " + webUrl);
String proxy_ip = proxyIpPort.split(":")[0];
int proxy_port = Integer.parseInt(proxyIpPort.split(":")[1]);
CredentialsProvider provider = new BasicCredentialsProvider();
provider.setCredentials( new AuthScope(proxy_ip, proxy_port),
new UsernamePasswordCredentials(MimvpProxyJava2.PROXY_USERNAME, MimvpProxyJava2.PROXY_PASSWORD));
CloseableHttpClient client = HttpClients.custom().setDefaultCredentialsProvider(provider).build();
try {
URL url = new URL(webUrl);
HttpHost target = new HttpHost(url.getHost(),url.getDefaultPort(),url.getProtocol());
HttpHost proxy = new HttpHost(proxy_ip, proxy_port);
RequestConfig config = RequestConfig.custom().setProxy(proxy).build();
HttpGet request = new HttpGet(url.getPath());
request.setConfig(config);
request.addHeader("Host","proxy.mimvp.com");
request.addHeader("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36");
HttpResponse response = client.execute(target, request);
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
String result = EntityUtils.toString(response.getEntity());
System.out.println(result);
}
} catch (Exception e) {
e.printStackTrace();
}
}
// socks5代理的用户名密码授权
static class MyAuthenticator extends Authenticator {
private String username = "";
private String password = "";
public MyAuthenticator(String username, String password) {
this.username = username;
this.password = password;
}
protected PasswordAuthentication getPasswordAuthentication() {
return new PasswordAuthentication(this.username, this.password.toCharArray());
}
}
// 有密代理, 支持 socks5
public static void proxy_auth_socks(String proxyType, String proxyIpPort, String webUrl) {
System.out.println(proxyType + " , " + proxyIpPort + " , " + webUrl);
String proxy_ip = proxyIpPort.split(":")[0];
int proxy_port = Integer.parseInt(proxyIpPort.split(":")[1]);
try {
Registry reg = RegistryBuilder. create()
.register("http", new MyConnectionSocketFactory())
.register("https", new MySSLConnectionSocketFactory(SSLContexts.createSystemDefault()))
.build();
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(reg, new FakeDnsResolver());
CloseableHttpClient client = HttpClients.custom().setConnectionManager(cm).build();
// auth 代理需要用户名密码授权时开启,取消此注释,米扑代理验证通过
Authenticator.setDefault(new MyAuthenticator(MimvpProxyJava2.PROXY_USERNAME, MimvpProxyJava2.PROXY_PASSWORD));
InetSocketAddress addr = new InetSocketAddress(proxy_ip, proxy_port);
HttpClientContext context = HttpClientContext.create();
context.setAttribute("socks.address", addr);
HttpGet request = new HttpGet(webUrl);
request.addHeader("Host","proxy.mimvp.com");
request.addHeader("User-Agent","Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36");
HttpResponse response = client.execute(request, context);
if(response.getStatusLine().getStatusCode() == HttpStatus.SC_OK) {
String result = EntityUtils.toString(response.getEntity());
System.out.println(result);
}
} catch (Exception e) {
System.out.println(e.toString());
}
}
}
NodeJS 示例
/**
* NodeJS 支持 http、https、socks4、socks5
*
* 米扑代理示例:
* https://proxy.mimvp.com/demo
*
* 米扑代理购买:
* https://proxy.mimvp.com
*
* mimvp.com
* 2017.5.25
*
* 米扑博客示例详解 :
* https://blog.mimvp.com/article/18812.html
*
* proxy-agent : https://www.npmjs.com/package/proxy-agent
* superagent-proxy : https://www.npmjs.com/package/superagent-proxy
* http-proxy-agent : https://www.npmjs.com/package/http-proxy-agent
* https-proxy-agent : https://www.npmjs.com/package/https-proxy-agent
* socks-proxy-agent : https://www.npmjs.com/package/socks-proxy-agent
*/
var proxy_http = "http://35.154.138.213:80";
var proxy_https = "http://103.247.154.105:8080";
var proxy_socks4 = "socks4://202.159.8.243:1080";
var proxy_socks5 = "socks5://94.177.216.47:2016";
var mimvp_url = "http://proxy.mimvp.com/exist";
var mimvp_url2 = "https://proxy.mimvp.com/exist";
var mimvp_url3 = "https://apps.bdimg.com/libs/jquery-i18n/1.1.1/jquery.i18n.min.js";
//http for http
function check_http(proxy_uri, mimvp_url) {
var http = require('http');
var url = require('url');
console.log("proxy_uri : " + proxy_uri);
var proxy_ip = proxy_uri.toString().split("://")[1].split(":")[0]; // IP
var proxy_port = proxy_uri.toString().split("://")[1].split(":")[1]; // Port
const options = {
host : proxy_ip,
port : proxy_port,
path : mimvp_url,
method : 'GET',
headers : {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
"Host": "apps.bdimg.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
}
};
http
.request(options, function(res) {
console.log("got response code: " + res.statusCode);
if (res.statusCode == 200) {
res.setEncoding("utf-8");
var resData = "";
res.on("data", function(chunk) {
resData += chunk;
}).on("end", function() {
console.log("got response text: " + resData);
});
} else {
console.log("res.statusCode err " + res.statusCode); // 请求失败
}
})
.on("error", function(err) {
console.log(err);
})
.end();
};
// request http, https
function check_request(proxy_uri, mimvp_url) {
var request = require('request');
console.log("proxy_uri : " + proxy_uri);
const proxiedRequest = request.defaults({'proxy': proxy_uri});
const options = {
url : mimvp_url,
headers : {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
"Host": "apps.bdimg.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
}
};
proxiedRequest
.get(options, function (err, res, body) {
console.log("got response: " + res.statusCode);
console.log("got body: " + body);
})
.on("error", function (err) {
console.log(err);
});
};
// superagent-proxy http, https
function check_superagent(proxy_uri, mimvp_url) {
var superagent = require('superagent');
require('superagent-proxy')(superagent);
console.log("proxy_uri : " + proxy_uri);
superagent
.get(mimvp_url)
.proxy(proxy_uri)
.buffer(true)
.set('User-Agent','Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36')
.set('Host','apps.bdimg.com')
.set('Accept','text/html,application/xhtml+xml,application/xml,application/x-javascript;q=0.9,image/webp,image/apng,*/*;q=0.8')
.set('Accept-Encoding','gzip, deflate, br')
.end(function onResponse(err, res) {
if (err) {
console.log(err);
} else {
console.log(res.status, res.headers);
console.log(res.text);
}
});
};
// https-proxy-agent https
function check_httpsproxyagent(proxy_uri, mimvp_url) {
var url = require('url');
var https = require('https');
var HttpsProxyAgent = require('https-proxy-agent');
console.log('using proxy server %j', proxy_uri); // 'https://79.137.80.210:3128'
var agent = new HttpsProxyAgent(proxy_uri);
var endpoint = process.argv[2] || mimvp_url;
var options = url.parse(endpoint);
options.agent = agent;
options.port = 443;
options.secureProxy = true;
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
"Host": "apps.bdimg.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
};
options.headers = headers;
console.log('options %j', options);
https.get(options, function (res) {
console.log('"response" event!', res.headers);
res.pipe(process.stdout);
});
}
//socks-proxy-agent socks4, socks5
function check_socksproxyagent(proxy_uri, mimvp_url) {
var url = require('url');
var http = require('http');
var SocksProxyAgent = require('socks-proxy-agent');
console.log('using proxy server %j', proxy_uri); // 'socks://94.177.216.47:2016'
var agent = new SocksProxyAgent(proxy_uri);
var endpoint = process.argv[2] || mimvp_url;
var options = url.parse(endpoint);
options.agent = agent;
options.port = 443;
options.secureProxy = true;
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
"Host": "apps.bdimg.com",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
"Accept-Encoding": "gzip, deflate, br",
};
options.headers = headers;
console.log('options %j', options);
http.get(options, function (res) {
console.log('"response" event!', res.headers);
res.pipe(process.stdout);
});
}
// http
check_http(proxy_http, mimvp_url); // http
// request
check_request(proxy_http, mimvp_url); // http
check_request(proxy_https, mimvp_url3); // https
// superagent
check_superagent(proxy_http, mimvp_url); // http
check_superagent(proxy_https, mimvp_url3); // https
// https_proxy_agent
check_httpsproxyagent(proxy_https, mimvp_url3); // https
// socks_proxy_agent
check_socksproxyagent(proxy_socks4, mimvp_url); // socks4
check_socksproxyagent(proxy_socks5, mimvp_url); // socks5
// 执行命令:
// cd /usr/local/node/lib/node_modules/npm/
// sudo cp ~/Documents/workspace/MimvpProxyDemo/NodeJS/mimvp-proxy-nodejs.js .
// sudo node mimvp-proxy-nodejs.js
PhantomJS 示例
/**
* PhantomJS 支持 http、socks5
*
* 米扑代理示例:
* https://proxy.mimvp.com/demo
*
* 米扑代理购买:
* https://proxy.mimvp.com
*
* mimvp.com
* 2017.5.22
*/
// http代理格式 : phantomjs --proxy-type=http --proxy=IP:Port
// socks代理格式 : phantomjs --proxy-type=socks5 --proxy=IP:Port
/**
$ phantomjs -h
--ignore-ssl-errors= Ignores SSL errors (expired/self-signed certificate errors): 'true' or 'false' (default)
--proxy= Sets the proxy server, e.g. '--proxy=http://proxy.company.com:8080'
--proxy-auth= Provides authentication information for the proxy, e.g. ''-proxy-auth=username:password'
--proxy-type= Specifies the proxy type, 'http' (default), 'none' (disable completely), or 'socks5'
--script-encoding= Sets the encoding used for the starting script, default is 'utf8'
--web-security= Enables web security, 'true' (default) or 'false'
--ssl-protocol= Selects a specific SSL protocol version to offer.
--ssl-ciphers= Sets supported TLS/SSL ciphers.
--ssl-certificates-path= Sets the location for custom CA certificates
--ssl-client-certificate-file= Sets the location of a client certificate
--ssl-client-key-file= Sets the location of a clients' private key
--ssl-client-key-passphrase= Sets the passphrase for the clients' private key
*/
var mimvp_url = "http://proxy.mimvp.com/exist";
var mimvp_url2 = "https://proxy.mimvp.com/exist";
var mimvp_url3 = "https://apps.bdimg.com/libs/jquery-i18n/1.1.1/jquery.i18n.min.js";
var page = require('webpage').create();
page.settings.userAgent = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36';
page.open(mimvp_url, {}, function(status) { // http
//page.open(mimvp_url2, {}, function(status) { // socks5
console.log('status : ' + status);
if (status == "success") {
console.log("set proxy success");
console.log('page.content : ' + page.content);
console.log('page.content length : ' + page.content.length);
} else {
console.log("set proxy fail");
}
phantom.exit();
});
// 执行命令:
// http : phantomjs --proxy-type=http --proxy=23.94.65.132:1080 mimvp-proxy-phantomjs.js
// socks5 : phantomjs --proxy-type=socks5 --proxy=78.63.194.59:35844 --ignore-ssl-errors=true mimvp-proxy-phantomjs.js
Perl 示例
#!/usr/bin/env perl
#
# Perl 支持 http、https
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2017-03-28
use CGI;
use strict;
use LWP::UserAgent;
our %proxy_http = ("http", "http://138.68.165.154:3128");
our %proxy_https = ("https", "https://113.106.94.213:80");
our %proxy_connect = ("https", "connect://173.233.55.118:443");
our $mimvp_url = "http://proxy.mimvp.com/exist";
our $mimvp_url2 = "https://proxy.mimvp.com/exist";
## http
sub test_http {
my ($url, %proxy) = @_;
print "proxy : $proxy{'http'}\n";
print "https : $proxy{'https'}\n";
print "url : $url\n";
my $browser = LWP::UserAgent->new(ssl_opts => { verify_hostname => 0 });
$browser->env_proxy();
# # 设置的代理格式
# $browser->proxy('http', 'http://138.68.165.154:3128');
# $browser->proxy(['http','ftp'], 'http://138.68.165.154:3128');
$browser->proxy(%proxy);
$browser->timeout(30);
$browser->agent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36');
my $response = $browser->get($url); # 爬取的网址
my $is_success = $response->is_success(); # 1
my $content_type = $response->content_type(); # text/html
my $content = $response->content(); # 网页正文
my $content_length = length($content); # 网页正文长度
print "$is_success\n";
print "$content_type\n";
print "$content_length\n";
print "$content\n";
}
## https (NO Success)
## error info : LWP::Protocol::https::Socket: SSL connect attempt failed because of handshake problems SSL wants a read first at /System/Library/Perl/Extras/5.18/LWP/Protocol/http.pm line 51.
sub test_https {
my ($url, %proxy) = @_;
print "proxy : $proxy{'http'}\n";
print "https : $proxy{'https'}\n";
print "url : $url\n";
BEGIN {
$ENV{HTTPS_PROXY} = 'https://173.233.55.118:443';
# $ENV{HTTPS_PROXY_USERNAME} = ;
# $ENV{HTTPS_PROXY_PASSWORD} = ;
$ENV{HTTPS_DEBUG} = 1;
$ENV{PERL_LWP_SSL_VERIFY_HOSTNAME} = 0;
}
my $browser = LWP::UserAgent->new(ssl_opts => { verify_hostname => 0 });
$browser->env_proxy();
# # 设置的代理格式
# $browser->proxy(%proxy); # NO USE
$browser->timeout(30);
$browser->agent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36');
my $req = new HTTP::Request('GET', $url); # 爬取的网址
my $response = $browser->request($req);
my $is_success = $response->is_success(); # 1
my $content_type = $response->content_type(); # text/html
my $content = $response->content(); # 网页正文
my $content_length = length($content); # 网页正文长度
print "$is_success\n";
print "$content_type\n";
print "$content_length\n";
print "$content\n";
}
## https (connect)
## 1. download LWP-Protocol-connect (wget http://search.cpan.org/CPAN/authors/id/B/BE/BENNING/LWP-Protocol-connect-6.09.tar.gz)
## 2. tar zxvf LWP-Protocol-connect-6.09.tar.gz
## cd LWP-Protocol-connect-6.09
## perl Makefile.PL
## make
## sudo make install
sub test_connect {
my ($url, %proxy) = @_;
print "proxy : $proxy{'http'}\n";
print "https : $proxy{'https'}\n";
print "url : $url\n";
my $browser = LWP::UserAgent->new();
$browser->env_proxy();
# # 设置的代理格式
# $browser->proxy('https', 'connect://173.233.55.118:443');
$browser->proxy(%proxy);
$browser->timeout(30);
$browser->agent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36');
# my $req = new HTTP::Request('GET', $url);
# my $response = $browser->request($req);
my $response = $browser->get($url); # 爬取的网址
my $is_success = $response->is_success(); # 1
my $content_type = $response->content_type(); # text/html
my $content = $response->content(); # 网页正文
my $content_length = length($content); # 网页正文长度
print "$is_success\n";
print "$content_type\n";
print "$content_length\n";
print "$content\n";
}
test_http($mimvp_url, %proxy_http); # http
test_https($mimvp_url2, %proxy_https); # https (NO Success)
test_connect($mimvp_url2, %proxy_connect); # https (connect)
## 执行命令
## perl -d mimvp-proxy-perl.pl
PHP 示例
/**
* PHP 支持 http、https、socks4、socks5、tcp
*
* curl不是php原生库,需要安装库:
* Ubuntu系统:sudo apt-get -y install php5-curl
* CentOS系统:yum -y install php-curl
* MacOS 系统:brew install php-curl
*
* 米扑代理示例:
* https://proxy.mimvp.com/demo
*
* 米扑代理购买:
* https://proxy.mimvp.com
*
* mimvp.com
* 2017.5.25
*
* CURLOPT_PROXYTYPE :
* CURLPROXY_HTTP, CURLPROXY_HTTP_1_0
* CURLPROXY_SOCKS4, CURLPROXY_SOCKS4A
* CURLPROXY_SOCKS5, CURLPROXY_SOCKS5_HOSTNAME
*/
$mimvp_url = "http://proxy.mimvp.com/test_proxy2"; // http
$mimvp_url2 = "https://proxy.mimvp.com/test_proxy2"; // https
# 实时爬取米扑代理API接口 (curl)
function spider_proxy_by_curl() {
$proxy_url = "https://proxyapi.mimvp.com/api/fetchsecret?orderid=868435225671251234&http_type=3";
$ch = curl_init ();
curl_setopt($ch, CURLOPT_URL, $proxy_url);
curl_setopt($ch, CURLOPT_SSL_VERIFYPEER, FALSE);
curl_setopt($ch, CURLOPT_SSL_VERIFYHOST, FALSE);
curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 30);
curl_setopt($ch, CURLOPT_TIMEOUT, 30);
curl_setopt($ch, CURLOPT_HEADER, false); // 不返回头信息
curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); // 返回网页内容
$result = curl_exec($ch);
curl_close($ch);
var_dump($result);
$proxy_list = explode("\n", $result);
foreach ($proxy_list as $proxy) {
echo "
$proxy";
}
}
# 实时爬取米扑代理API接口 (stream)
function spider_proxy_by_stream() {
$proxy_url = "https://proxyapi.mimvp.com/api/fetchsecret?orderid=868435225671251234&http_type=3";
$header_array = array( "Accept-Language:zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4",
"Referer:http://proxy.mimvp.com/fetchopen",
"Host:proxy.mimvp.com",
"User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36");
$options = array("http" => array(
"header" => $header_array,
"method" => "GET")
);
$context = stream_context_create($options);
$result = file_get_contents($proxy_url, false, $context);
var_dump($result);
$proxy_list = explode("\n", $result);
foreach ($proxy_list as $proxy) {
echo "
$proxy";
}
}
######################### php curl 支持 http、https、socks4、socks5(代理无密码)#########################
$proxy_http = "http://118.89.57.38:8080";
$proxy_https = "https://39.134.108.92:8080";
$proxy_socks4 = "socks4://59.37.163.176:1080";
$proxy_socks5 = "socks5://183.156.71.161:1080";
function proxy_curl($proxy_uri, $mimvp_url) {
$proxy_type = explode('://', $proxy_uri)[0]; // http, https, socks4, socks5
$proxy_ip_port = explode('://', $proxy_uri)[1]; // ip:port
echo "proxy_uri: $proxy_uri ; proxy_type: $proxy_type , proxy_ip_port: $proxy_ip_port
";
$ch = curl_init ();
curl_setopt ( $ch, CURLOPT_URL, $mimvp_url);
curl_setopt ( $ch, CURLOPT_HTTPPROXYTUNNEL, false );
curl_setopt ( $ch, CURLOPT_PROXY, $proxy_ip_port );
if ($proxy_type == "http") {
curl_setopt ( $ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP ); // http
}
elseif ($proxy_type == "https") {
curl_setopt ( $ch, CURLOPT_SSL_VERIFYHOST, 2 );
curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER, false ); // https
}
elseif ($proxy_type == "socks4") {
curl_setopt ( $ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS4 ); // socks4
}
elseif ($proxy_type == "socks5") {
curl_setopt ( $ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5 ); // socks5
}
curl_setopt ( $ch, CURLOPT_TIMEOUT, 60 );
curl_setopt ( $ch, CURLOPT_CONNECTTIMEOUT, 60 );
curl_setopt ( $ch, CURLOPT_HEADER, false );
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, true ); // 返回网页内容
$result = curl_exec ( $ch );
print_r($result); // 打印网页正文
$curl_info = curl_getinfo($ch); // 打印网页信息
var_dump($curl_info);
echo $curl_info['size_download'] . "
";
curl_close ( $ch );
}
######################### php curl 支持 http、https、socks5(代理有密码)#########################
$proxy_auth_http = "http://username:password@125.121.168.231:5140";
$proxy_auth_https = "https://username:password@125.121.168.231:5140";
$proxy_auth_socks5 = "socks5://username:password@125.121.168.231:5141";
function proxy_auth_curl($proxy_uri, $mimvp_url) {
$proxy_type = explode('://', $proxy_uri)[0]; // http, https, socks4, socks5
$proxy_ip_port = explode('://', $proxy_uri)[1]; // ip:port
echo "proxy_uri: $proxy_uri ; proxy_type: $proxy_type , proxy_ip_port: $proxy_ip_port
";
$ch = curl_init ();
curl_setopt ( $ch, CURLOPT_URL, $mimvp_url);
curl_setopt ( $ch, CURLOPT_HTTPPROXYTUNNEL, false );
curl_setopt ( $ch, CURLOPT_PROXY, $proxy_ip_port );
if ($proxy_type == "http") {
curl_setopt ( $ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP ); // http
}
elseif ($proxy_type == "https") {
curl_setopt ( $ch, CURLOPT_SSL_VERIFYHOST, 2 );
curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER, false ); // https
}
elseif ($proxy_type == "socks5") {
curl_setopt ( $ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5 ); // socks5
}
curl_setopt ( $ch, CURLOPT_TIMEOUT, 60 );
curl_setopt ( $ch, CURLOPT_CONNECTTIMEOUT, 60 );
curl_setopt ( $ch, CURLOPT_HEADER, false );
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, true ); // 返回网页内容
$result = curl_exec ( $ch );
print_r($result); // 打印网页正文
$curl_info = curl_getinfo($ch); // 打印网页信息
var_dump($curl_info);
echo $curl_info['size_download'] . "
";
curl_close ( $ch );
}
######################### php curl 支持 http、https、socks5(代理有密码)#########################
$proxy_noauth_http = "http://125.121.168.231:5140";
$proxy_noauth_https = "https://125.121.168.231:5140";
$proxy_noauth_socks5 = "socks5://125.121.168.231:5141";
$PROXY_USERNAME = 'username';
$PROXY_PASSEORD = 'password';
function proxy_noauth_curl($proxy_uri, $mimvp_url) {
global $PROXY_USERNAME;
global $PROXY_PASSEORD;
$proxy_type = explode('://', $proxy_uri)[0]; // http, https, socks4, socks5
$proxy_ip_port = explode('://', $proxy_uri)[1]; // ip:port
echo "proxy_uri: $proxy_uri ; proxy_type: $proxy_type , proxy_ip_port: $proxy_ip_port
";
$ch = curl_init ();
curl_setopt ( $ch, CURLOPT_URL, $mimvp_url);
curl_setopt ( $ch, CURLOPT_HTTPPROXYTUNNEL, false );
curl_setopt ( $ch, CURLOPT_PROXY, $proxy_ip_port );
# 设置代理授权
curl_setopt($ch, CURLOPT_PROXYAUTH, CURLAUTH_BASIC);
curl_setopt($ch, CURLOPT_PROXYUSERPWD, "{$PROXY_USERNAME}:{$PROXY_PASSEORD}");
if ($proxy_type == "http") {
curl_setopt ( $ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP ); // http
}
elseif ($proxy_type == "https") {
curl_setopt ( $ch, CURLOPT_SSL_VERIFYHOST, 2 );
curl_setopt ( $ch, CURLOPT_SSL_VERIFYPEER, false ); // https
}
elseif ($proxy_type == "socks5") {
curl_setopt ( $ch, CURLOPT_PROXYTYPE, CURLPROXY_SOCKS5 ); // socks5
}
curl_setopt ( $ch, CURLOPT_TIMEOUT, 60 );
curl_setopt ( $ch, CURLOPT_CONNECTTIMEOUT, 60 );
curl_setopt ( $ch, CURLOPT_HEADER, false );
curl_setopt ( $ch, CURLOPT_RETURNTRANSFER, true ); // 返回网页内容
$result = curl_exec ( $ch );
print_r($result); // 打印网页正文
$curl_info = curl_getinfo($ch); // 打印网页信息
var_dump($curl_info);
echo $curl_info['size_download'] . "
";
curl_close ( $ch );
}
######################### php stream 支持 tcp, 含http和https代理 #########################
$proxy_tcp = "tcp://118.89.57.38:8080"; // 支持http
$proxy_tcp = "tcp://39.134.108.92:8080"; // 支持https
function proxy_stream($proxy_uri, $mimvp_url) {
$proxy_type = explode('://', $proxy_uri)[0]; // http, https
$proxy_ip_port = explode('://', $proxy_uri)[1]; // ip:port
echo "proxy_uri: $proxy_uri ; proxy_type: $proxy_type , proxy_ip_port: $proxy_ip_port
";
$header_array = array( "Accept-Language:zh-CN,zh;q=0.8,en;q=0.6,ja;q=0.4",
"Referer:http://proxy.mimvp.com/fetchopen",
"Host:proxy.mimvp.com",
"User-Agent:Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36");
$options = array("http" => array(
"proxy" => $proxy_uri,
"header" => $header_array,
"method" => "GET")
);
$context = stream_context_create($options);
$result = file_get_contents($mimvp_url, false, $context);
print_r($result);
}
// 通过API获取米扑代理
spider_proxy_by_curl();
spider_proxy_by_stream();
// curl proxy no proxy
proxy_curl($proxy_http, $mimvp_url); // http
proxy_curl($proxy_https, $mimvp_url); // https
proxy_curl($proxy_socks4, $mimvp_url); // socks4
proxy_curl($proxy_socks5, $mimvp_url); // socks5
// 授权方法1: curl proxy proxy
proxy_auth_curl($proxy_auth_http, $mimvp_url); // http
proxy_auth_curl($proxy_auth_https, $mimvp_url); // https
proxy_auth_curl($proxy_auth_socks5, $mimvp_url); // socks5 访问http
proxy_auth_curl($proxy_auth_socks5, $mimvp_url2); // socks5 访问https
// 授权方法2: curl proxy proxy
proxy_noauth_curl($proxy_noauth_http, $mimvp_url); // http
proxy_noauth_curl($proxy_noauth_https, $mimvp_url); // https
proxy_noauth_curl($proxy_noauth_socks5, $mimvp_url); // socks5 访问http
proxy_noauth_curl($proxy_noauth_socks5, $mimvp_url2); // socks5 访问https
// stream
proxy_stream($proxy_tcp, $mimvp_url); // tcp http
proxy_stream($proxy_tcp, $mimvp_url2); // tcp https
Python2 示例
#!/usr/bin/env python
# -*- coding:utf-8 -*-
#
# Python2 支持 http、https、socks4、socks5
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2015-11-09
import urllib, urllib2
import base64
import socks, socket # 需要引入socks.py文件,请到米扑代理示例下载
import sys
reload(sys)
sys.setdefaultencoding('utf-8')
# 全局取消ssl证书验证,防止打开未验证的https网址抛出异常
# urllib2.URLError:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
mimvp_url = "http://proxy.mimvp.com/test_proxy2"
mimvp_url2 = "https://proxy.mimvp.com/test_proxy2"
# 爬取米扑代理API接口
def spider_proxy():
proxy_url = 'https://proxyapi.mimvp.com/api/fetchopen?orderid=863196322034111234'
req = urllib2.Request(proxy_url)
content = urllib2.urlopen(req, timeout=60).read()
proxy_list = content.split("\n")
for proxy in proxy_list:
print proxy
################### proxy no auth(代理无用户名密码验证)###################
proxy_http = {"http" : "http://111.199.144.207:8118"}
proxy_https = {"https" : "http://180.173.111.237:9797"}
proxy_socks4 = {'socks4' : '218.58.52.158:1088'}
proxy_socks5 = {'socks5' : '68.234.190.150:45454'}
# urllib2 支持 http
def test_http(proxy, mimvp_url):
handler = urllib2.ProxyHandler(proxy)
opener = urllib2.build_opener(handler, urllib2.HTTPHandler) # http
urllib2.install_opener(opener)
req = urllib2.Request(mimvp_url)
content = urllib2.urlopen(req, timeout=60).read()
print len(content), content
opener.close()
# urllib2 支持 https
def test_https(proxy, mimvp_url):
handler = urllib2.ProxyHandler(proxy)
opener = urllib2.build_opener(handler, urllib2.HTTPSHandler) # https
urllib2.install_opener(opener)
req = urllib2.Request(mimvp_url)
content = urllib2.urlopen(req, timeout=60).read()
print len(content), content
opener.close()
# urllib 支持 http, https
def test_http2(proxy, mimvp_url):
opener = urllib.FancyURLopener(proxy)
f = opener.open(mimvp_url)
content = f.read()
print len(content), content
f.close()
opener.close()
# socks4
def test_socks4(socks4, mimvp_url):
socks4_ip = socks4.split(":")[0]
socks4_port = int(socks4.split(":")[1])
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS4, socks4_ip, socks4_port)
socket.socket = socks.socksocket
content = urllib2.urlopen(mimvp_url, timeout=30).read()
print len(content), content
# socks5
def test_socks5(socks5, mimvp_url):
socks5_ip = socks5.split(":")[0]
socks5_port = int(socks5.split(":")[1])
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, socks5_ip, socks5_port)
socket.socket = socks.socksocket
content = urllib2.urlopen(mimvp_url, timeout=30).read()
print len(content), content
################### proxy auth(代理有用户名密码验证)###################
proxy_noauth_http = {"http" : "http://120.24.177.37:3363"}
proxy_noauth_https = {"https" : "http://120.24.177.37:3363"}
proxy_auth_http = {"http" : "http://username:password@120.24.177.37:3363"}
proxy_auth_https = {"https" : "http://username:password@120.24.177.37:3363"}
proxy_auth_socks5 = {'socks5' : '120.24.177.37:33634'}
# urllib2 支持 http
def test_auth_http(proxy, mimvp_url):
handler = urllib2.ProxyHandler(proxy)
opener = urllib2.build_opener(handler, urllib2.HTTPHandler) # http
urllib2.install_opener(opener)
req = urllib2.Request(mimvp_url)
content = urllib2.urlopen(req, timeout=60).read()
print len(content), content
opener.close()
# urllib2 支持 https
def test_auth_https(proxy, mimvp_url):
handler = urllib2.ProxyHandler(proxy)
opener = urllib2.build_opener(handler, urllib2.HTTPSHandler) # https
urllib2.install_opener(opener)
req = urllib2.Request(mimvp_url)
content = urllib2.urlopen(req, timeout=60).read()
print len(content), content
opener.close()
# urllib2 headers (注意:headers认证,只支持访问http网页,不支持访问https网页)
def test_auth_headers(proxy, mimvp_url):
PROXY_USERNAME = 'username'
PROXY_PASSWORD = 'password'
proxy_type = ""
proxy_ip_port = ""
for proxy_type in proxy:
proxy_value = proxy.get(proxy_type, '')
proxy_ip_port = proxy_value.split("://")[1]
req = urllib2.Request(mimvp_url)
req.add_header("Proxy-Authorization", "Basic %s" % base64.b64encode(b'%s:%s' % (PROXY_USERNAME, PROXY_PASSWORD)))
req.set_proxy(proxy_ip_port, proxy_type)
content = urllib2.urlopen(req, timeout=60).read()
print len(content), content
# urllib 支持 http, https
def test_auth_http2(proxy, mimvp_url):
opener = urllib.FancyURLopener(proxy)
f = opener.open(mimvp_url)
content = f.read()
print len(content), content
f.close()
opener.close()
# socks5
def test_auth_socks5(socks5, mimvp_url):
PROXY_USERNAME = 'username'
PROXY_PASSWORD = 'password'
socks5_ip = socks5.split(":")[0]
socks5_port = int(socks5.split(":")[1])
rdns = False #是否在代理服务器上进行dns查询
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, socks5_ip, socks5_port, rdns, PROXY_USERNAME, PROXY_PASSWORD)
socket.socket = socks.socksocket
content = urllib2.urlopen(mimvp_url, timeout=30).read()
print len(content), content
if __name__ == "__main__":
spider_proxy()
# proxy no auth
test_http(proxy_http, mimvp_url) # http
test_https(proxy_https, mimvp_url2) # https
test_http2(proxy_http, mimvp_url) # http
test_http2(proxy_https, mimvp_url2) # https
test_socks4(proxy_socks4['socks4'], mimvp_url) # socks4
test_socks5(proxy_socks5['socks5'], mimvp_url) # socks5
# proxy auth
test_auth_http(proxy_auth_http, mimvp_url) # http
test_auth_https(proxy_auth_https, mimvp_url2) # https
test_auth_headers(proxy_noauth_http, mimvp_url) # http (不支持访问https网页)
test_auth_http2(proxy_auth_http, mimvp_url) # http
test_auth_http2(proxy_auth_https, mimvp_url2) # https
test_auth_socks5(proxy_auth_socks5['socks5'], mimvp_url) # socks5
Python3 示例
#!/usr/bin/env python3
# -*- coding:utf-8 -*-
#
# Python3 支持 http、https、socks4、socks5
#
# Python3使用socks4/5,需安装PySocks
# pip3 install PySocks
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2016-10-22
import urllib.request # Python3将urllib和urllib2合二为一,并重组了下包结构
import socket
import socks # 安装 pip3 install PySocks
mimvp_url = "http://proxy.mimvp.com/test_proxy2"
mimvp_url2 = "https://proxy.mimvp.com/test_proxy2"
# 全局取消ssl证书验证,防止打开未验证的https网址抛出异常
# urllib.error.URLError:
import ssl
ssl._create_default_https_context = ssl._create_unverified_context
# 通过API提供米扑代理
def spider_proxy():
proxy_url = 'https://proxyapi.mimvp.com/api/fetchsecret?orderid=863196312334111234&http_type=3'
req = urllib.request.Request(proxy_url)
content = urllib.request.urlopen(req, timeout=30).read()
proxy_list = content.decode().split("\n")
for proxy in proxy_list:
print(proxy)
################### proxy no auth(代理无用户名密码验证)###################
proxy_http = {"http" : "http://47.94.230.42:9999"}
proxy_https = {"https" : "http://221.180.204.144:80"}
proxy_socks4 = {'socks4': '61.234.37.3:1080'}
proxy_socks5 = {'socks5': '183.156.71.161:1080'}
# urllib 支持 http, https
def test_http(proxy, mimvp_url):
socket.setdefaulttimeout(60)
handler = urllib.request.ProxyHandler(proxy)
opener = urllib.request.build_opener(handler)
urllib.request.install_opener(opener)
content = urllib.request.urlopen(mimvp_url, timeout=60).read().decode("utf8")
print(len(content), content)
opener.close()
# socks4
def test_socks4(socks4, mimvp_url):
socks4_ip = socks4.split(":")[0]
socks4_port = int(socks4.split(":")[1])
socks.set_default_proxy(socks.SOCKS4, socks4_ip, socks4_port)
socket.socket = socks.socksocket
content = urllib.request.urlopen(mimvp_url, timeout=30).read().decode("utf8")
print(len(content), content)
# socks5
def test_socks5(socks5, mimvp_url):
socks5_ip = socks5.split(":")[0]
socks5_port = int(socks5.split(":")[1])
socks.set_default_proxy(socks.SOCKS5, socks5_ip, socks5_port)
socket.socket = socks.socksocket
content = urllib.request.urlopen(mimvp_url, timeout=30).read().decode("utf8")
print(len(content), content)
################### proxy auth(代理有用户名密码验证)###################
proxy_noauth_http = {"http" : "http://120.24.77.37:1056"}
proxy_noauth_https = {"https" : "http://120.24.77.37:1056"}
proxy_auth_socks5 = {'socks5' : '120.24.177.37:10545'}
proxy_auth_http = {"http" : "http://username:password@120.24.77.37:1056"}
proxy_auth_https = {"https" : "http://username:password@120.24.77.37:1056"}
# urllib 支持 http, https
def test_auth_http(proxy, mimvp_url):
socket.setdefaulttimeout(30)
handler = urllib.request.ProxyHandler(proxy)
opener = urllib.request.build_opener(handler)
urllib.request.install_opener(opener)
content = urllib.request.urlopen(mimvp_url, timeout=60).read().decode("utf8")
print(len(content), content)
opener.close()
# socks5
def test_auth_socks5(socks5, mimvp_url):
PROXY_USERNAME = 'username'
PROXY_PASSWORD = 'password'
socks5_ip = socks5.split(":")[0]
socks5_port = int(socks5.split(":")[1])
rdns = False # 是否在代理服务器上进行dns查询
socks.set_default_proxy(socks.SOCKS5, socks5_ip, socks5_port, rdns, PROXY_USERNAME, PROXY_PASSWORD)
socket.socket = socks.socksocket
content = urllib.request.urlopen(mimvp_url, timeout=30).read().decode("utf8")
print(len(content), content)
if __name__ == "__main__":
spider_proxy() # 通过API获取米扑代理
# proxy no auth
test_http(proxy_http, mimvp_url) # http
test_http(proxy_https, mimvp_url2) # https
test_socks4(proxy_socks4['socks4'], mimvp_url) # socks4 访问http网页
test_socks4(proxy_socks4['socks4'], mimvp_url2) # socks4 访问https网页
test_socks5(proxy_socks5['socks5'], mimvp_url) # socks5 访问http网页
test_socks5(proxy_socks5['socks5'], mimvp_url2) # socks5 访问https网页
# proxy auth
test_auth_http(proxy_auth_http, mimvp_url) # http
test_auth_http(proxy_auth_https, mimvp_url2) # https
test_auth_socks5(proxy_auth_socks5['socks5'], mimvp_url) # socks5 访问http网页
test_auth_socks5(proxy_auth_socks5['socks5'], mimvp_url2) # socks5 访问https网页
Python pyspider 示例
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
# Project: mimvp_proxy_pyspider
#
# Python pyspider 支持 http、https
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2017-07-22
#
# 米扑博客示例详解 :
# https://blog.mimvp.com/article/19010.html
############ 方式1:pyspider crawl_config ############
from pyspider.libs.base_handler import *
class Handler(BaseHandler):
crawl_config = {
'proxy' : 'http://188.226.141.217:8080', # http
'proxy' : 'https://182.253.32.65:3128' # https
}
@every(minutes=24 * 60)
def on_start(self):
self.crawl('http://proxy.mimvp.com/exist', callback=self.index_page)
@config(age=10 * 24 * 60 * 60)
def index_page(self, response):
for each in response.doc('a[href^="http"]').items():
self.crawl(each.attr.href, callback=self.detail_page)
@config(priority=2)
def detail_page(self, response):
return {
"url": response.url,
"title": response.doc('title').text(),
}
############ 方式2:pyspider --phantomjs-proxy 启动 ############
# $ pyspider --help
# Usage: pyspider [OPTIONS] COMMAND [ARGS]...
#
# A powerful spider system in python.
#
# Options:
# -c, --config FILENAME a json file with default values for
# subcommands. {"webui": {"port":5001}}
# --logging-config TEXT logging config file for built-in python
# logging module [default: /Library/Framework
# s/Python.framework/Versions/2.7/lib/python2.
# 7/site-packages/pyspider/logging.conf]
# --debug debug mode
# --queue-maxsize INTEGER maxsize of queue
# --taskdb TEXT database url for taskdb, default: sqlite
# --projectdb TEXT database url for projectdb, default: sqlite
# --resultdb TEXT database url for resultdb, default: sqlite
# --message-queue TEXT connection url to message queue, default:
# builtin multiprocessing.Queue
# --amqp-url TEXT [deprecated] amqp url for rabbitmq. please
# use --message-queue instead.
# --beanstalk TEXT [deprecated] beanstalk config for beanstalk
# queue. please use --message-queue instead.
# --phantomjs-proxy TEXT phantomjs proxy ip:port
# --data-path TEXT data dir path
# --add-sys-path / --not-add-sys-path
# add current working directory to python lib
# search path
# --version Show the version and exit.
# --help Show this message and exit.
pyspider --phantomjs-proxy "188.226.141.217:8080" all
Python requests 示例
#!/usr/bin/env python
# -*- encoding: utf-8 -*-
#
# Python requests 支持 http、https、socks4、socks5
#
# requests不是Python原生库,需要安装:
# pip install requests
# pip install requests[socks] # requests >= 2.10.0
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2016-09-16
import requests
import base64
import socks, socket # 需要引入socks.py文件,请到米扑代理示例下载
# 用于无用户名密码且访问https网址的代理
# import ssl
# ssl._create_default_https_context = ssl._create_unverified_context
mimvp_url = "http://proxy.mimvp.com/test_proxy2" # http
mimvp_url2 = "https://proxy.mimvp.com/test_proxy2" # https
################### proxy no auth (代理无用户名密码验证)###################
# 使用代理 http, https
proxies = {
"http" : "http://91.121.162.173:80",
"https" : "http://190.24.131.250:3128",
}
req = requests.get(mimvp_url, proxies=proxies, timeout=30) # http
print("mimvp text : " + req.text)
req = requests.get(mimvp_url2, proxies=proxies, timeout=30, verify=False) # https
print("mimvp text : " + req.text)
# 使用代理 socks4
proxies = {
'socks4' : '222.83.247.90:1080',
}
socks4_ip = proxies['socks4'].split(":")[0]
socks4_port = int(proxies['socks4'].split(":")[1])
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS4, socks4_ip, socks4_port)
socket.socket = socks.socksocket
req = requests.get(mimvp_url, timeout=30) # http
print("mimvp text : " + req.text)
req = requests.get(mimvp_url2, timeout=30, verify=False) # https
print("mimvp text : " + req.text)
# 方式1:使用代理 socks5
proxies = {
'socks5' : '121.40.195.151:9999',
}
socks5_ip = proxies['socks5'].split(":")[0]
socks5_port = int(proxies['socks5'].split(":")[1])
socks.setdefaultproxy(socks.PROXY_TYPE_SOCKS5, socks5_ip, socks5_port)
socket.socket = socks.socksocket
req = requests.get(mimvp_url, timeout=30) # http
print("mimvp text : " + req.text)
req = requests.get(mimvp_url2, timeout=30, verify=False) # https
print("mimvp text : " + req.text)
# 方式2:使用代理 socks5 (需要安装 pip install requests[socks] 且要求 requests >= 2.10.0)
proxies = {
"http" : "socks5://59.78.45.101:1080",
"https" : "socks5://59.78.45.101:1080",
}
req = requests.get(mimvp_url, proxies=proxies, timeout=30) # http
print("mimvp text : " + req.text)
req = requests.get(mimvp_url2, proxies=proxies, timeout=30, verify=False) # https
print("mimvp text : " + req.text)
################### proxy auth (代理有用户名密码验证)###################
# 方式1:使用代理 http, https
proxies = {
"http" : "http://username:password@139.199.22.226:4765",
"https" : "http://username:password@139.199.22.226:4765",
}
req = requests.get(mimvp_url, proxies=proxies, timeout=30) # http
print("mimvp text : " + req.text)
req = requests.get(mimvp_url2, proxies=proxies, timeout=30) # https
print("mimvp text : " + req.text)
# 方式2:使用代理 http (注意:headers认证,只支持访问http网页,不支持访问https网页)
PROXY_USERNAME = 'username'
PROXY_PASSWORD = 'password'
proxies = {
"http" : "http://111.230.100.182:3712",
"https" : "http://111.230.100.182:3712",
}
headers = {
"Host" : "proxy.mimvp.com",
"User-Agent" : "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36",
"Accept-Encoding" : "Gzip",
"Proxy-Authorization" : "Basic %s" % base64.b64encode(b'%s:%s' % (PROXY_USERNAME, PROXY_PASSWORD)),
}
req = requests.get(mimvp_url, proxies=proxies, headers=headers, timeout=30) # http (经米扑验证,此方式不支持https)
print("mimvp text : " + req.text)
# 使用代理 socks5 (需要安装 pip install requests[socks] 且要求 requests >= 2.10.0)
proxies = {
"http" : "socks5://username:password@123.53.118.23:1872",
"https" : "socks5://username:password@123.53.118.23:1872",
}
req = requests.get(mimvp_url, proxies=proxies, timeout=30) # http
print("mimvp text : " + req.text)
req = requests.get(mimvp_url2, proxies=proxies, timeout=30, verify=False) # https
print("mimvp text : " + req.text)
################### requests 简单示例 ###################
# 实时获取米扑代理API接口
mimvp_proxy_url = 'https://proxyapi.mimvp.com/api/fetchopen?orderid=863196322034111234'
req = requests.get(url=mimvp_proxy_url, timeout=30, verify=False)
req_text = req.text
proxy_list = req_text.split("\n")
for proxy in proxy_list:
print proxy
# 爬取米扑科技首页
req = requests.get(url = 'https://mimvp.com', verify=False)
print("status_code : " + str(req.status_code))
print("mimvp text : " + req.text)
# 爬取米扑代理(含请求参数)
req = requests.get(url='https://proxy.mimvp.com/freeopen', params={'proxy':'out_tp','sort':'p_ping'}, verify=False)
print("status_code : " + str(req.status_code))
print("mimvp text : " + req.text)
Python scrapy 示例
#!/usr/bin/env python
# -*- coding:utf-8 -*-
#
# Python scrapy 支持 http、https
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2016.10.1
# Python Scrapy 设置代理有两种方式,使用时两种方式选择一种即可
# 方式1: 直接在代码里设置,如 MimvpSpider ——> start_requests
# 方式2: 通过 middlewares.py + settings.py 配置文件设置,步骤:
# 2.1 middlewares.py 添加代理类 ProxyMiddleware,并添加代理
# 2.2 settings.py 开启 DOWNLOADER_MIDDLEWARES,并且添加 'mimvp_proxy_python_scrapy.middlewares.ProxyMiddleware': 100
#
# 米扑博客想写介绍了 Python scrapy 设置代理的两种方式(附源码)
# https://blog.mimvp.com/article/19083.html
import scrapy
################################ 方式1:直接代码设置代理 ################################
class MimvpSpider(scrapy.spiders.Spider):
name = "mimvp"
allowed_domains = ["mimvp.com"]
start_urls = [
"http://proxy.mimvp.com/test_proxy2",
"https://proxy.mimvp.com/test_proxy2",
]
## 代理设置方式1:直接在代理里设置
def start_requests(self):
urls = [
"http://proxy.mimvp.com/test_proxy2",
"https://proxy.mimvp.com/test_proxy2",
]
for url in urls:
meta_proxy = ""
# proxy no auth(代理无用户名密码验证,或白名单ip授权)
# 白名单ip授权,请见米扑代理会员中心:https://proxy.mimvp.com/usercenter/?p=whiteip
if url.startswith("http://"):
meta_proxy = "http://180.96.27.12:88" # http代理
elif url.startswith("https://"):
meta_proxy = "http://109.108.87.136:53281" # https代理
# # proxy auth(代理有用户名密码验证)
# # 用户名密码授权,请见米扑代理会员中心:https://proxy.mimvp.com/usercenter/?p=whiteip
# if url.startswith("http://"):
# meta_proxy = "http://username:password@180.96.27.12:88" # http代理
# elif url.startswith("https://"):
# meta_proxy = "http://username:password@109.108.87.136:53281" # https代理
yield scrapy.Request(url=url, callback=self.parse, meta={'proxy': meta_proxy})
def parse(self, response):
mimvp_url = response.url # 爬取时请求的url
body = response.body # 返回网页内容
print("mimvp_url : " + str(mimvp_url))
print("body : " + str(body))
################################ 方式2:配置文件设置代理 ################################
# 配置文件设置代理,需要同时在 middlewares.py + settings.py 两个文件里配置,详细步骤:
## a) middlewares.py 文件里,添加代码:
## 代理设置方式2: 通过 middlewares.py + settings.py 配置文件设置
## mimvp custom by yourself
class ProxyMiddleware(object):
def process_request(self,request,spider):
# proxy no auth(代理无用户名密码验证,或白名单ip授权)
# 白名单ip授权,请见米扑代理会员中心:https://proxy.mimvp.com/usercenter/?p=whiteip
if request.url.startswith("http://"):
request.meta['proxy']="http://180.96.27.12:88" # http代理
elif request.url.startswith("https://"):
request.meta['proxy']="http://109.108.87.136:53281" # https代理
# # proxy auth(代理有用户名密码验证)
# # 用户名密码授权,请见米扑代理会员中心:https://proxy.mimvp.com/usercenter/?p=whiteip
# if request.url.startswith("http://"):
# request.meta['proxy']="http://username:password@180.96.27.12:88" # http代理
# elif request.url.startswith("https://"):
# request.meta['proxy']="http://username:password@109.108.87.136:53281" # https代理
# # proxy authentication (经验证,失败,不推荐)
# proxy_user_pass = "username:password"
# encoded_user_pass = base64.encodestring(proxy_user_pass)
# request.headers['Proxy-Authorization'] = 'Basic ' + encoded_user_pass
## b)settings.py 文件里,开启设置 DOWNLOADER_MIDDLEWARES
DOWNLOADER_MIDDLEWARES = {
'scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware': 110,
'mimvp_proxy_python_scrapy.middlewares.ProxyMiddleware': 100,
## c) 代码里编写爬虫
## vim mimvp_spider.py
import scrapy
class MimvpSpider(scrapy.spiders.Spider):
name = "mimvp"
allowed_domains = ["mimvp.com"]
start_urls = [
"http://proxy.mimvp.com/exist",
"https://proxy.mimvp.com/exist",
]
def parse(self, response):
mimvp_url = response.url # 爬取时请求的url
body = response.body # 返回网页内容
print("mimvp_url : " + str(mimvp_url))
print("body : " + str(body))
## 运行命令:
## scrapy crawl mimvp --nolog
Ruby 示例
#!/usr/bin/env ruby
#
# Ruby 支持 http
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2017-03-21
require 'rubygems'
require 'net/http'
require 'open-uri'
require 'timeout'
mimvp_url = "http://proxy.mimvp.com/exist"
mimvp_url2 = "https://proxy.mimvp.com/exist"
mimvp_url3 = "https://apps.bdimg.com/libs/jquery-i18n/1.1.1/jquery.i18n.min.js"
$proxy = '183.222.102.95:8080'
$proxy_addr = $proxy.split(':')[0].strip
$proxy_port = $proxy.split(':')[1].strip
puts $proxy_addr
puts $proxy_port
begin
Timeout.timeout(30) {
# mimvp_url = http://proxy.mimvp.com/exist
# uri.host = proxy.mimvp.com
# uri.port = 80
# uri.path = /exist
uri = URI.parse(mimvp_url)
result = Net::HTTP.new(uri.host, nil, $proxy_addr, $proxy_port).start { |http|
http.get(uri.path)
}
puts result.body
# mimvp_url = http://proxy.mimvp.com/exist
# uri.host = proxy.mimvp.com
# uri.port = 80
# uri.path = /exist
# req = #
uri = URI.parse(mimvp_url)
req = Net::HTTP::Get.new(uri.path)
result = Net::HTTP::Proxy($proxy_addr, $proxy_port).start(uri.host, uri.port) {|http|
http.request(req)
}
puts result.body
# # proxy auth (NO TEST)
# $proxy_addr = 'proxy addr'
# $proxy_port = 'proxy_port'
# $proxy_user = 'username' # 代理用户名
# $proxy_pass = 'password' # 代理密码
#
# $website_username = 'website_username' # 目标网站登录用户名
# $website_password = 'website_password' # 目标网站登录密码
#
# uri = URI.parse(mimvp_url)
# req = Net::HTTP::Get.new(uri.path)
# req.basic_auth($website_username, $website_password)
#
# result = Net::HTTP::Proxy($proxy_addr, $proxy_port, $proxy_user, $proxy_pass).start(uri.host, uri.port) {|http|
# http.request(req)
# }
# puts result.body
}
rescue => e
puts e.inspect
exit
end
## 运行命令: ruby mimvp-proxy-ruby.rb
Selenium Java 示例(FirefoxDriver + HtmlUnitDriver)
/**
* Selenum FirefoxDriver 支持 http、socks5
*
* 米扑代理示例:
* https://proxy.mimvp.com/demo
*
* 米扑代理购买:
* https://proxy.mimvp.com
*
* mimvp.com
* 2015-11-09
*/
package com.mimvp;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.firefox.FirefoxProfile;
public class MimvpProxy_FirefoxDriver {
final static String proxyUri = "103.35.171.77:8080"; // 代理服务器(HTTP, HTTPS)
final static String proxySocks = "50.63.167.102:18628"; // 代理服务器(Socks5)
final static String mimvpUrl = "http://proxy.mimvp.com/exist"; // 爬取网址
// final static String firefoxBin = "D:/Program Files/Mozilla Firefox/firefox.exe"; // Windows
// final static String firefoxBin = "/usr/bin/firefox-bin"; // Linux
final static String firefoxBin = "/Applications/Firefox.app/Contents/MacOS/firefox-bin"; // Mac
public static void main(String[] args) {
getNoProxy();
getHttpProxy();
getSocksProxy();
}
// 不用代理爬取网页
public static void getNoProxy() {
// System.setProperty("webdriver.firefox.bin", firefoxBin); // 不设置也可以
FirefoxProfile profile = new FirefoxProfile();
// 启动Firefox,抓取网页后,再关闭Firefox
FirefoxDriver driver = new FirefoxDriver(profile);
driver.get(mimvpUrl);
String title = driver.getTitle();
System.out.println(title); // 检测收录 - 米扑代理
driver.quit();
driver.close();
}
// HTTP和HTTPS代理爬取网页
public static void getHttpProxy() {
FirefoxProfile profile = new FirefoxProfile();
// 设置代理
profile.setPreference("network.proxy.type", 1); // 0 - 不用代理; 1 - 使用代理
profile.setPreference("network.proxy.http", proxyUri.split(":")[0]);
profile.setPreference("network.proxy.http_port", Integer.parseInt(proxyUri.split(":")[1]));
profile.setPreference("network.proxy.ssl", proxyUri.split(":")[0]);
profile.setPreference("network.proxy.ssl_port", Integer.parseInt(proxyUri.split(":")[1]));
profile.setPreference("network.proxy.no_proxies_on", "localhost");
// // 代理设置用户名和密码,无密码的代理无需配置
// profile.setPreference("username", "mimvp-user");
// profile.setPreference("password", "mimvp-pwd");
FirefoxDriver driver = new FirefoxDriver(profile);
driver.get(mimvpUrl);
String title = driver.getTitle();
System.out.println(title); // 检测收录 - 米扑代理
driver.close();
}
// Socks5代理爬取网页
public static void getSocksProxy() {
FirefoxProfile profile = new FirefoxProfile();
// 设置代理
profile.setPreference("network.proxy.type", 1); // 0 - 不用代理; 1 - 使用代理
profile.setPreference("network.proxy.socks", proxySocks.split(":")[0]);
profile.setPreference("network.proxy.socks_port", Integer.parseInt(proxySocks.split(":")[1]));
profile.setPreference("network.proxy.no_proxies_on", "localhost");
FirefoxDriver driver = new FirefoxDriver(profile);
driver.get(mimvpUrl);
String title = driver.getTitle();
System.out.println(title); // 检测收录 - 米扑代理
driver.close();
}
}
-----------------------------------------
/**
* Selenum HtmlUnitDriver 支持 http、socks5
*
* 米扑代理示例:
* https://proxy.mimvp.com/demo
*
* 米扑代理购买:
* https://proxy.mimvp.com
*
* mimvp.com
* 2015-11-09
*/
package com.mimvp;
import org.openqa.selenium.By;
import org.openqa.selenium.Keys;
import org.openqa.selenium.Platform;
import org.openqa.selenium.Proxy;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.openqa.selenium.interactions.Actions;
import org.openqa.selenium.remote.CapabilityType;
import org.openqa.selenium.remote.DesiredCapabilities;
import com.gargoylesoftware.htmlunit.DefaultCredentialsProvider;
import com.gargoylesoftware.htmlunit.WebClient;
public class MimvpProxy_HtmlUnitDriver {
final static String proxyUri = "183.222.102.98:8080"; // 代理服务器(HTTP)
final static String proxySocks = "103.14.27.174:1080"; // 代理服务器(Socks5)
final static String mimvpUrl = "http://proxy.mimvp.com/exist"; // 爬取网址
public static void main(String[] args) {
getNoProxy();
getHttpProxy();
getSocksProxy();
getAuthProxy();
getBaiduSearch("米扑科技");
}
// 不用代理爬取网页
public static void getNoProxy() {
HtmlUnitDriver driver = new HtmlUnitDriver(true); // enable javascript
driver.setJavascriptEnabled(true);
driver.get(mimvpUrl);
String title = driver.getTitle();
System.out.println(title); // 检测收录 - 米扑代理
}
// HTTP代理爬取网页
public static void getHttpProxy() {
HtmlUnitDriver driver = new HtmlUnitDriver(true); // enable javascript
// 方法1
driver.setProxy(proxyUri.split(":")[0], Integer.parseInt(proxyUri.split(":")[1])); // proxyUri = "183.222.102.98:8080"
// 方式2
driver.setHTTPProxy(proxyUri.split(":")[0], Integer.parseInt(proxyUri.split(":")[1]), null); // proxyUri = "183.222.102.98:8080"
// 方法3
Proxy proxy = new Proxy();
proxy.setHttpProxy(proxyUri); // 设置代理服务器地址, proxyUri = "183.222.102.98:8080"
driver.setProxySettings(proxy);
driver.get(mimvpUrl);
String html = driver.getPageSource();
System.out.println(html);
String title = driver.getTitle();
System.out.println(title); // 检测收录 - 米扑代理
}
// Socks5代理爬取网页
public static void getSocksProxy() {
HtmlUnitDriver driver = new HtmlUnitDriver(true); // enable javascript
// 方式1
driver.setSocksProxy(proxySocks.split(":")[0], Integer.parseInt(proxySocks.split(":")[1])); // proxySocks = "183.239.240.138:1080"
// 方式2
driver.setSocksProxy(proxySocks.split(":")[0], Integer.parseInt(proxySocks.split(":")[1]), null); // proxySocks = "183.239.240.138:1080"
driver.get(mimvpUrl);
String html = driver.getPageSource();
System.out.println(html);
String title = driver.getTitle();
System.out.println(title); // 检测收录 - 米扑代理
}
// 代理需要用户名和密码
public static void getAuthProxy() {
HtmlUnitDriver driver = null;
final String proxyUser = "mimvp-user";
final String proxyPass = "mimvp-pwd";
Proxy proxy = new Proxy();
proxy.setHttpProxy(proxyUri);
// 设置代理的用户名和密码
DesiredCapabilities capabilities = DesiredCapabilities.htmlUnit();
capabilities.setCapability(CapabilityType.PROXY, proxy);
capabilities.setJavascriptEnabled(true);
capabilities.setPlatform(Platform.WIN8_1);
driver = new HtmlUnitDriver(capabilities) {
@Override
protected WebClient modifyWebClient(WebClient client) {
DefaultCredentialsProvider creds = new DefaultCredentialsProvider();
creds.addCredentials(proxyUser, proxyPass);
client.setCredentialsProvider(creds);
return client;
}
};
driver.setJavascriptEnabled(true); // enable javascript
driver.get(mimvpUrl);
String title = driver.getTitle();
System.out.println(title); // 检测收录 - 米扑代理
}
// 进行百度搜索
public static void getBaiduSearch(String keyword) {
final String url = "http://www.baidu.com";
WebDriver driver = new HtmlUnitDriver(false);
driver.get(url);
driver.findElement(By.id("kw")).sendKeys(keyword);
Actions action = new Actions(driver);
action.sendKeys(Keys.ENTER).perform();
String html = driver.getPageSource();
System.out.println(html);
}
}
Selenium Python 示例(PhantomJS)
#!/usr/bin/env python
# -*- coding:utf-8 -*-
#
# Selenium PhantomJS 支持 http、socks5
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2017-01-08
from selenium import webdriver
proxy_http = "http://138.68.165.154:3128"
proxy_socks5 = "socks5://209.151.135.126:11747"
mimvp_url = "http://proxy.mimvp.com/exist"
mimvp_url2 = "https://proxy.mimvp.com/exist"
mimvp_url3 = "https://apps.bdimg.com/libs/jquery-i18n/1.1.1/jquery.i18n.min.js"
# urllib2 支持 http, https
def test_http(proxy, mimvp_url):
proxy_type = proxy.split("://")[0]
proxy_ip = proxy.split("://")[1].split(":")[0]
proxy_port = int(proxy.split("://")[1].split(":")[1])
# 使用PhantomJS命令,因PhantomJS仅支持http、socks5,所以Selenium Webdriver也只支持http、socks5
service_args = [
"--proxy-type=%(http)s" % {"http" : proxy_type},
"--proxy=%(host)s:%(port)s" % {
"host" : proxy_ip,
"port" : proxy_port,
},
# "--proxy-auth=%(user)s:%(pass)s" % {
# "user" : 'mimvp-user',
# "pass" : 'mimvp-pass',
# },
]
phantomjs_path = r"/opt/phantomjs-2.1.1/bin/phantomjs"
driver = webdriver.PhantomJS(executable_path=phantomjs_path, service_args=service_args)
driver.get(mimvp_url)
print driver.title
print driver.page_source.encode("utf-8")
driver.quit()
if __name__ == "__main__":
# http, socks5
test_http(proxy_http, mimvp_url)
test_http(proxy_socks5, mimvp_url)
#################### Python + Selenium + Firefox ####################
#!/usr/bin/env python
# -*- coding:utf-8 -*-
#
# Selenium + Firefox 支持 http、https (无密和加密两种方式),不支持socks5
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2017-06-08
# Python + Selenium + Firefox 设置密码时,需要使用到两个插件:
# 插件1: modify_headers-0.7.1.1-fx.xpi
# 下载地址:https://github.com/mimvp/mimvp-proxy-demo
#
# 方式2: close_proxy_authentication-1.1.xpi
# 下载地址:https://github.com/mimvp/mimvp-proxy-demo
#
# Python + Selenium + Firefox 代理示例,详见米扑博客:
# https://blog.mimvp.com/article/25055.html
#
# 本示例由米扑代理原创,测试代理来自于米扑代理
# 密码授权和白名单ip设置,请见米扑代理 - 会员中心:https://proxy.mimvp.com/usercenter/?p=whiteip
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
from selenium.webdriver.common.proxy import *
from pyvirtualdisplay import Display
# from xvfbwrapper import Xvfb
import bs4, os
from base64 import b64encode
import sys
reload(sys)
sys.setdefaultencoding('utf8')
## webdriver + firefox (不使用代理,爬取网页)
def spider_url_firefox(url):
browser = None
display = None
try:
display = Display(visible=0, size=(800, 600))
display.start()
browser = webdriver.Firefox() # 打开 FireFox 浏览器
browser.get(url)
content = browser.page_source
print("content: " + str(content))
finally:
if browser: browser.quit()
if display: display.stop()
## webdriver + firefox + proxy + whiteip (无密码,或白名单ip授权)
## 米扑代理:https://proxy.mimvp.com
def spider_url_firefox_by_whiteip(url):
browser = None
display = None
## 白名单ip,请见米扑代理会员中心: https://proxy.mimvp.com/usercenter/?p=whiteip
mimvp_proxy = {
'ip' : '140.143.62.84', # ip
'port_https' : 19480, # http, https
'port_socks' : 19481, # socks5
'username' : 'mimvp-user',
'password' : 'mimvp-pass'
}
try:
display = Display(visible=0, size=(800, 600))
display.start()
profile = webdriver.FirefoxProfile()
# add proxy
profile.set_preference('network.proxy.type', 1) # ProxyType.MANUAL = 1
if url.startswith("http://"):
profile.set_preference('network.proxy.http', mimvp_proxy['ip'])
profile.set_preference('network.proxy.http_port', mimvp_proxy['port_https']) # 访问http网站
elif url.startswith("https://"):
profile.set_preference('network.proxy.ssl', mimvp_proxy['ip'])
profile.set_preference('network.proxy.ssl_port', mimvp_proxy['port_https']) # 访问https网站
else:
profile.set_preference('network.proxy.socks', mimvp_proxy['ip'])
profile.set_preference('network.proxy.socks_port', mimvp_proxy['port_socks'])
profile.set_preference('network.proxy.ftp', mimvp_proxy['ip'])
profile.set_preference('network.proxy.ftp_port', mimvp_proxy['port_https'])
profile.set_preference('network.proxy.no_proxies_on', 'localhost,127.0.0.1')
## 不存在此用法,不能这么设置用户名密码 (舍弃)
# profile.set_preference("network.proxy.username", 'mimvp-user')
# profile.set_preference("network.proxy.password", 'mimvp-pass')
profile.update_preferences()
browser = webdriver.Firefox(profile) # 打开 FireFox 浏览器
browser.get(url)
content = browser.page_source
print("content: " + str(content))
finally:
if browser: browser.quit()
if display: display.stop()
## webdriver + firefox + proxy + https (https密码授权)
## 米扑代理:https://proxy.mimvp.com
def spider_url_firefox_by_https(url):
browser = None
display = None
## 授权密码,请见米扑代理会员中心: https://proxy.mimvp.com/usercenter/?p=whiteip
mimvp_proxy = {
'ip' : '140.143.62.84', # ip
'port_https' : 19480, # http, https
'port_socks' : 19481, # socks5
'username' : 'mimvp-user',
'password' : 'mimvp-pass'
}
try:
display = Display(visible=0, size=(800, 600))
display.start()
profile = webdriver.FirefoxProfile()
# add new header
profile.add_extension("modify_headers-0.7.1.1-fx.xpi")
profile.set_preference("extensions.modify_headers.currentVersion", "0.7.1.1-fx")
profile.set_preference("modifyheaders.config.active", True)
profile.set_preference("modifyheaders.headers.count", 1)
profile.set_preference("modifyheaders.headers.action0", "Add")
profile.set_preference("modifyheaders.headers.name0", "Proxy-Switch-Ip")
profile.set_preference("modifyheaders.headers.value0", "yes")
profile.set_preference("modifyheaders.headers.enabled0", True)
# add proxy
profile.set_preference('network.proxy.type', 1) # ProxyType.MANUAL = 1
if url.startswith("http://"):
profile.set_preference('network.proxy.http', mimvp_proxy['ip'])
profile.set_preference('network.proxy.http_port', mimvp_proxy['port_https']) # 访问http网站
elif url.startswith("https://"):
profile.set_preference('network.proxy.ssl', mimvp_proxy['ip'])
profile.set_preference('network.proxy.ssl_port', mimvp_proxy['port_https']) # 访问https网站
# Proxy auto login (自动填写密码,进行代理授权)
profile.add_extension('close_proxy_authentication-1.1.xpi')
credentials = '{username}:{password}'.format(username=mimvp_proxy['username'], password=mimvp_proxy['password']) # auth
credentials = b64encode(credentials.encode('ascii')).decode('utf-8')
profile.set_preference('extensions.closeproxyauth.authtoken', credentials)
profile.update_preferences()
browser = webdriver.Firefox(profile) # 打开 FireFox 浏览器
browser.get(url)
content = browser.page_source
print("content: " + str(content))
finally:
if browser: browser.quit()
if display: display.stop()
## webdriver + firefox + proxy + socks (socks密码授权)
## 米扑代理:https://proxy.mimvp.com
def spider_url_firefox_by_socks(url):
browser = None
display = None
## 授权密码,请见米扑代理会员中心: https://proxy.mimvp.com/usercenter/?p=whiteip
mimvp_proxy = {
'ip' : '140.143.62.84', # ip
'port_https' : 19480, # http, https
'port_socks' : 19481, # socks5
'username' : 'mimvp-user',
'password' : 'mimvp-pass'
}
proxy_config = Proxy({
'proxyType' : ProxyType.MANUAL, # 1
'httpProxy' : mimvp_proxy['ip'] + ":" + str(mimvp_proxy['port_https']),
'sslProxy' : mimvp_proxy['ip'] + ":" + str(mimvp_proxy['port_https']),
'socksProxy' : mimvp_proxy['ip'] + ":" + str(mimvp_proxy['port_socks']),
'ftpProxy' : mimvp_proxy['ip'] + ":" + str(mimvp_proxy['port_https']),
'noProxy' : 'localhost,127.0.0.1',
'socksUsername' : mimvp_proxy['username'],
'socksPassword' : mimvp_proxy['password'],
})
try:
display = Display(visible=0, size=(800, 600))
display.start()
profile = webdriver.FirefoxProfile()
# add new header
profile.add_extension("modify_headers-0.7.1.1-fx.xpi")
profile.set_preference("extensions.modify_headers.currentVersion", "0.7.1.1-fx")
profile.set_preference("modifyheaders.config.active", True)
profile.set_preference("modifyheaders.headers.count", 1)
profile.set_preference("modifyheaders.headers.action0", "Add")
profile.set_preference("modifyheaders.headers.name0", "Proxy-Switch-Ip")
profile.set_preference("modifyheaders.headers.value0", "yes")
profile.set_preference("modifyheaders.headers.enabled0", True)
# auto save auth
profile.set_preference("signon.autologin.proxy", 'true')
profile.set_preference("network.websocket.enabled", 'false')
profile.set_preference('network.proxy.share_proxy_settings', 'false')
profile.set_preference('network.automatic-ntlm-auth.allow-proxies', 'false')
profile.set_preference('network.auth.use-sspi', 'false')
profile.update_preferences()
browser = webdriver.Firefox(proxy=proxy_config, firefox_profile=profile) # 打开 FireFox 浏览器
browser.get(url)
content = browser.page_source
print("content: " + str(content))
finally:
if browser: browser.quit()
if display: display.stop()
if __name__ == '__main__':
url = 'https://ip.cn'
url = 'https://mimvp.com'
url = 'https://proxy.mimvp.com/ip'
# 不使用代理,爬取网页,成功
spider_url_firefox(url)
# 代理无密码,或设置白名单ip,成功
spider_url_firefox_by_whiteip(url)
# http, https 密码授权,成功
spider_url_firefox_by_https(url)
# socks5 密码授权,失败 (仍然是本机ip请求的,不是代理ip请求)
spider_url_firefox_by_socks(url)
#################### Python + Selenium + Chrome ####################
#!/usr/bin/env python
# -*- coding:utf-8 -*-
#
# Selenium + Chrome 支持 http、https (无密和加密两种方式),不支持socks5
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2017-01-08
# Python + Selenium + Chrome 代理示例,详见米扑博客:
# https://blog.mimvp.com/article/25076.html
#
# 本示例由米扑代理原创,测试代理来自于米扑代理
# 密码授权和白名单ip设置,请见米扑代理 - 会员中心:https://proxy.mimvp.com/usercenter/?p=whiteip
from selenium import webdriver
from selenium.webdriver.common.proxy import *
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from pyvirtualdisplay import Display
# from xvfbwrapper import Xvfb
import bs4, os, re, time, zipfile
from base64 import b64encode
import sys
from posix import unlink
reload(sys)
sys.setdefaultencoding('utf8')
## webdriver + chrome (不使用代理,爬取网页)
def spider_url_chrome(url):
browser = None
display = None
try:
display = Display(visible=0, size=(800, 600))
display.start()
chromedriver = '/usr/local/bin/chromedriver'
browser = webdriver.Chrome(executable_path=chromedriver) # 打开 Chrome 浏览器
browser.get(url)
content = browser.page_source
print("content: " + str(content))
finally:
if browser: browser.quit()
if display: display.stop()
## webdriver + chrome + proxy + whiteip (无密码,或白名单ip授权)
## 米扑代理:https://proxy.mimvp.com
def spider_url_chrome_by_whiteip(url):
browser = None
display = None
## 白名单ip,请见米扑代理会员中心: https://proxy.mimvp.com/usercenter/?p=whiteip
mimvp_proxy = {
'ip' : '140.143.62.84', # ip
'port_https' : 62288, # http, https
'port_socks' : 62287, # socks5
'username' : 'mimvp-user',
'password' : 'mimvp-pass'
}
try:
display = Display(visible=0, size=(800, 600))
display.start()
chrome_options = Options() # ok
chrome_options = webdriver.ChromeOptions() # ok
proxy_https_argument = '--proxy-server=http://{ip}:{port}'.format(ip=mimvp_proxy['ip'], port=mimvp_proxy['port_https']) # http, https (无密码,或白名单ip授权,成功)
chrome_options.add_argument(proxy_https_argument)
# proxy_socks_argument = '--proxy-server=socks5://{ip}:{port}'.format(ip=mimvp_proxy['ip'], port=mimvp_proxy['port_socks']) # socks5 (无密码,或白名单ip授权,失败)
# chrome_options.add_argument(proxy_socks_argument)
chromedriver = '/usr/local/bin/chromedriver'
browser = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_options) # 打开 Chrome 浏览器
browser.get(url)
content = browser.page_source
print("content: " + str(content))
finally:
if browser: browser.quit()
if display: display.stop()
## webdriver + chrome + proxy + https (https密码授权)
## 米扑代理:https://proxy.mimvp.com
def spider_url_chrome_by_https(url):
browser = None
display = None
try:
display = Display(visible=0, size=(800, 600))
display.start()
chrome_options = Options() # ok
chrome_options = webdriver.ChromeOptions() # ok
chrome_options.add_extension("proxy2.zip") ## 手动打zip包,包含 background.js 和 manifest.json 两个文件
chromedriver = '/usr/local/bin/chromedriver'
browser = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_options) # 打开 Chrome 浏览器
browser.get(url)
content = browser.page_source
print("content: " + str(content))
finally:
if browser: browser.quit()
if display: display.stop()
## 自动打zip包,包含 background.js 和 manifest.json 两个文件
def get_chrome_proxy_extension(proxy):
"""获取一个Chrome代理扩展,里面配置有指定的代理(带用户名密码认证)
proxy - 指定的代理,格式: username:password@ip:port
"""
# Chrome代理插件的参考模板 https://github.com/RobinDev/Selenium-Chrome-HTTP-Private-Proxy
CHROME_PROXY_HELPER_DIR = 'Chrome-proxy-helper' # 自定义目录名,放在代理项目的当前同一级目录
# 存储自定义Chrome代理扩展文件的目录,一般为当前同一级目录
# 生成的zip路径为:chrome-proxy-extensions/mimvp-user_mimvp-pass@140.143.62.84_19480.zip
CUSTOM_CHROME_PROXY_EXTENSIONS_DIR = 'chrome-proxy-extensions'
m = re.compile('([^:]+):([^\@]+)\@([\d\.]+):(\d+)').search(proxy)
if m:
# 提取代理的各项参数
username = m.groups()[0]
password = m.groups()[1]
ip = m.groups()[2]
port = m.groups()[3]
# 创建一个定制Chrome代理扩展(zip文件)
if not os.path.exists(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR):
os.mkdir(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR)
extension_file_path = os.path.join(CUSTOM_CHROME_PROXY_EXTENSIONS_DIR, '{}.zip'.format(proxy.replace(':', '_')))
# 扩展文件不存在,则创建配置文件,并写入zip文件
if not os.path.exists(extension_file_path):
zf = zipfile.ZipFile(extension_file_path, mode='w')
zf.write(os.path.join(CHROME_PROXY_HELPER_DIR, 'manifest.json'), 'manifest.json')
# 替换模板中的代理参数
background_content = open(os.path.join(CHROME_PROXY_HELPER_DIR, 'background.js')).read()
background_content = background_content.replace('mimvp_proxy_host', ip)
background_content = background_content.replace('mimvp_proxy_port', port)
background_content = background_content.replace('mimvp_username', username)
background_content = background_content.replace('mimvp_password', password)
zf.writestr('background.js', background_content)
zf.close()
return extension_file_path
else:
raise Exception('Invalid proxy format. Should be username:password@ip:port')
## webdriver + chrome + proxy + https (https密码授权,自动打包zip)
## 米扑代理:https://proxy.mimvp.com
def spider_url_chrome_by_https2(url):
browser = None
display = None
try:
display = Display(visible=0, size=(800, 600))
display.start()
proxy = 'mimvp-user:mimvp-pass@118.24.232.74:53681'
proxy_zip = get_chrome_proxy_extension(proxy) # 打包代理zip文件
chrome_options = Options() # ok
chrome_options = webdriver.ChromeOptions() # ok
chrome_options.add_extension(proxy_zip)
chromedriver = '/usr/local/bin/chromedriver'
browser = webdriver.Chrome(executable_path=chromedriver, chrome_options=chrome_options) # 打开 Chrome 浏览器
browser.get(url)
content = browser.page_source
print("content: " + str(content))
unlink(proxy_zip) # 使用代理后,删除代理zip文件
finally:
if browser: browser.quit()
if display: display.stop()
## webdriver + chrome + proxy + socks (socks密码授权)
## 米扑代理:https://proxy.mimvp.com
def spider_url_chrome_by_socks(url):
browser = None
display = None
## 白名单ip,请见米扑代理会员中心: https://proxy.mimvp.com/usercenter/?p=whiteip
mimvp_proxy = {
'ip' : '140.143.62.84', # ip
'port_https' : 62288, # http, https
'port_socks' : 62289, # socks5
'username' : 'mimvp-user',
'password' : 'mimvp-pass'
}
try:
display = Display(visible=0, size=(800, 600))
display.start()
capabilities = dict(DesiredCapabilities.CHROME)
capabilities['proxy'] = {
'proxyType' : 'MANUAL',
# 'httpProxy' : mimvp_proxy['ip'] + ":" + str(mimvp_proxy['port_https']),
# 'sslProxy' : mimvp_proxy['ip'] + ":" + str(mimvp_proxy['port_https']),
'socksProxy' : mimvp_proxy['ip'] + ":" + str(mimvp_proxy['port_socks']),
'ftpProxy' : mimvp_proxy['ip'] + ":" + str(mimvp_proxy['port_https']),
'noProxy' : 'localhost,127.0.0.1',
'class' : "org.openqa.selenium.Proxy",
'autodetect' : False
}
capabilities['proxy']['socksUsername'] = mimvp_proxy['username']
capabilities['proxy']['socksPassword'] = mimvp_proxy['password']
chromedriver = '/usr/local/bin/chromedriver'
browser = webdriver.Chrome(chromedriver, desired_capabilities=capabilities)
browser.get(url)
content = browser.page_source
print("content: " + str(content))
finally:
if browser: browser.quit()
if display: display.stop()
if __name__ == '__main__':
url = 'https://mimvp.com/'
url = 'https://proxy.mimvp.com/ip'
# 不使用代理,爬取网页
spider_url_chrome(url)
# 代理无密码,或设置白名单ip,成功
spider_url_chrome_by_whiteip(url)
# http, https 密码授权,成功
spider_url_chrome_by_https(url)
spider_url_chrome_by_https2(url)
# socks5 密码授权,失败 (仍然是本机ip请求的,不是代理ip请求)
spider_url_chrome_by_socks(url)
############## chrome 需要用到的zip包的两个文件 ##############
###### background.js ######
vim Chrome-proxy-helper/background.js
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "mimvp_proxy_host",
port: parseInt(mimvp_proxy_port)
},
bypassList: ["mimvp.com"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "mimvp_username",
password: "mimvp_password"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: [""]},
['blocking']
);
###### manifest.json ######
vim Chrome-proxy-helper/manifest.json
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"",
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
Shell curl wget 示例
#!/bin/bash
#
# curl 支持 http、https、socks4、socks5
# wget 支持 http、https
#
# 米扑代理示例:
# https://proxy.mimvp.com/demo
#
# 米扑代理购买:
# https://proxy.mimvp.com
#
# mimvp.com
# 2015-11-09
#【米扑代理】:本示例,在CentOS、Ubuntu、MacOS等服务器上,均测试通过
#
# http代理格式 http_proxy=http://IP:Port
# https代理格式 https_proxy=http://IP:Port
################### proxy no auth(代理无用户名密码验证)###################
# curl和wget,爬取http网页
{'http': 'http://120.77.176.179:8888'}
curl -m 30 --retry 3 -x http://120.77.176.179:8888 http://proxy.mimvp.com/test_proxy2 # http_proxy
wget -T 30 --tries 3 -e "http_proxy=http://120.77.176.179:8888" http://proxy.mimvp.com/test_proxy2 # http_proxy
# curl和wget,爬取https网页(注意:添加参数,不经过SSL安全验证)
{'https': 'http://46.105.214.133:3128'}
curl -m 30 --retry 3 -x http://46.105.214.133:3128 -k https://proxy.mimvp.com/test_proxy2 # https_proxy
wget -T 30 --tries 3 -e "https_proxy=http://46.105.214.133:3128" --no-check-certificate https://proxy.mimvp.com/test_proxy2 # https_proxy
# curl 支持socks
# 其中,socks4和socks5两种协议的代理,都可以同时爬取http和https网页
{'socks4': '101.255.17.145:1080'}
curl -m 30 --retry 3 --socks4 101.255.17.145:1080 http://proxy.mimvp.com/test_proxy2
curl -m 30 --retry 3 --socks4 101.255.17.145:1080 https://proxy.mimvp.com/test_proxy2
{'socks5': '82.164.233.227:45454'}
curl -m 30 --retry 3 --socks5 82.164.233.227:45454 http://proxy.mimvp.com/test_proxy2
curl -m 30 --retry 3 --socks5 82.164.233.227:45454 https://proxy.mimvp.com/test_proxy2
# curl 发送post请求(代理ip不需密码授权,白名单ip授权的可以)
curl -m 30 --retry 3 -x 'http://126.185.148.106:3784' -d "post_data=i_love_mimvp" 'https://proxy.mimvp.com/ip'
curl -m 30 --retry 3 -x 'http://126.185.148.106:3784' -H "Content-Type:application/json" -X POST -d '{"post_data":"i_love_mimvp"}' 'https://proxy.mimvp.com/ip'
# wget 不支持socks
################### proxy auth(代理需要用户名和密码验证) ###################
# curl和wget,爬取http网页
curl -m 30 --retry 3 -x http://username:password@210.159.166.225:5718 http://proxy.mimvp.com/test_proxy2 # http
curl -m 30 --retry 3 -x http://username:password@210.159.166.225:5718 https://proxy.mimvp.com/test_proxy2 # https
curl -m 30 --retry 3 -U username:password -x http://210.159.166.225:5718 http://proxy.mimvp.com/test_proxy2 # http
curl -m 30 --retry 3 -U username:password -x http://210.159.166.225:5718 https://proxy.mimvp.com/test_proxy2 # https
curl -m 30 --retry 3 --proxy-user username:password -x http://210.159.166.225:5718 http://proxy.mimvp.com/test_proxy2 # http
curl -m 30 --retry 3 --proxy-user username:password -x http://210.159.166.225:5718 https://proxy.mimvp.com/test_proxy2 # https
wget -T 30 --tries 3 -e "http_proxy=http://username:password@2.19.16.5:5718" http://proxy.mimvp.com/test_proxy2
wget -T 30 --tries 3 -e "https_proxy=http://username:password@2.19.16.5:5718" https://proxy.mimvp.com/test_proxy2
wget -T 30 --tries 3 --proxy-user=username --proxy-password=password -e "http_proxy=http://2.19.16.5:5718" http://proxy.mimvp.com/test_proxy2
wget -T 30 --tries 3 --proxy-user=username --proxy-password=password -e "https_proxy=http://2.19.16.5:5718" https://proxy.mimvp.com/test_proxy2
# curl 支持socks
curl -m 30 --retry 3 -U username:password --socks5 21.59.126.22:57216 http://proxy.mimvp.com/test_proxy2 # socks5
curl -m 30 --retry 3 -U username:password --socks5 21.59.126.22:57216 https://proxy.mimvp.com/test_proxy2 # socks5
curl -m 30 --retry 3 --proxy-user username:password --socks5 21.59.126.22:57216 http://proxy.mimvp.com/test_proxy2 # socks5
curl -m 30 --retry 3 --proxy-user username:password --socks5 21.59.126.22:57216 https://proxy.mimvp.com/test_proxy2 # socks5
# curl 发送post请求(代理ip需要密码授权)
curl -m 30 --retry 3 -x 'http://af17b545e2b0:65432qzoj2@126.185.148.106:3784' -d "post_data=i_love_mimvp" 'https://proxy.mimvp.com/ip'
curl -m 30 --retry 3 -x 'http://af17b545e2b0:65432qzoj2@126.185.148.106:3784' -H "Content-Type:application/json" -X POST -d '{"post_data":"i_love_mimvp"}' 'https://proxy.mimvp.com/ip'
# wget 不支持socks
################### wget配置文件设置代理 ###################
vim ~/.wgetrc
http_proxy=http://120.77.176.179:8080
https_proxy=http://12.7.17.17:8888
use_proxy = on
wait = 30
# 配置文件设置后,立即生效,直接执行wget爬取命令即可
wget -T 30 --tries 3 http://proxy.mimvp.com/test_proxy2
wget -T 30 --tries 3 https://proxy.mimvp.com/test_proxy2
################### 设置临时局部代理 ###################
# proxy no auth
export http_proxy=http://120.77.176.179:8080
export https_proxy=http://12.7.17.17:8888
# proxy auth(代理需要用户名和密码验证)
export http_proxy=http://username:password@120.77.176.179:8080
export https_proxy=http://username:password@12.7.17.17:8888
# 直接爬取网页
curl -m 30 --retry 3 http://proxy.mimvp.com/test_proxy2 # http_proxy
curl -m 30 --retry 3 https://proxy.mimvp.com/test_proxy2 # https_proxy
wget -T 30 --tries 3 http://proxy.mimvp.com/test_proxy2 # http_proxy
wget -T 30 --tries 3 https://proxy.mimvp.com/test_proxy2 # https_proxy
# 取消设置
unset http_proxy
unset https_proxy
################### 设置系统全局代理 ###################
# 修改 /etc/profile,保存并重启服务器
sudo vim /etc/profile # 所有人有效
或
sudo vim ~/.bashrc # 所有人有效
或
vim ~/.bash_profile # 个人有效
## 在文件末尾,添加如下内容
# proxy no auth
export http_proxy=http://120.77.176.179:8080
export https_proxy=http://12.7.17.17:8888
# proxy auth(代理需要用户名和密码验证)
export http_proxy=http://username:password@120.77.176.179:8080
export https_proxy=http://username:password@12.7.17.17:8888
## 执行source命令,使配置文件生效(临时生效)
source /etc/profile
或
source ~/.bashrc
或
source ~/.bash_profile
## 若需要机器永久生效,则需要重启服务器
sudo reboot
易语言 示例
' 易语言 支持 http
'
' 米扑代理示例:
' https://proxy.mimvp.com/demo
'
' 米扑代理购买:
' https://proxy.mimvp.com
'
' mimvp.com
' 2017-07-28
'
' 易语言的安装与开发,请参考米扑博客:
' https://blog.mimvp.com/article/19268.html
'
' 易语言的网络编程和代理示例,请参考米扑博客,附有源代码:
' https://blog.mimvp.com/article/19303.html
' 变量声明(表格)
.局部变量 代理地址, 文本型
.局部变量 目标网址, 文本型
.局部变量 代理用户名, 文本型
.局部变量 代理密码, 文本型
' 米扑代理的私密和独享代理的授权密码
代理用户名 = “61c9c4fce123”
代理密码 = “45a90pabcd”
' 获取目标网址
.如果 (目标网址编辑框.内容 = “”)
目标网址 = “https://proxy.mimvp.com/ip”
.否则
目标网址 = 目标网址编辑框.内容
' 获取代理地址
.如果 (代理服务器编辑框.内容 = “”)
代理地址 = “211.159.166.225:52223”
.否则
代理地址 = 代理服务器编辑框.内容
.如果结束
输出调试文本 (“代理地址: ” + 代理地址)
' 设置代理方法1
网页_置浏览器代理 (代理地址) ' 通过浏览器设置,一般只支持http代理
超文本浏览框.跳转 (目标网址编辑框.内容, , )
' 设置代理方法2
输出调试文本 (网页_访问S (目标网址, , , , , , , , , , 代理地址, , ))
' 设置代理方式3(用户名 + 密码)
输出调试文本 (网页_访问S (目标网址, , , , , , , , , , 代理地址, 代理用户名, 代理密码))
# 易语言源码:
# 易语言代理设置: