Smart AI Proxy · Crawlbase 文档

端点

HTTPSsmartproxy.crawlbase.com:8013

HTTPsmartproxy.crawlbase.com:8012

建议优先使用端口 8013 上的 HTTPS 代理（推荐）。端口 8012 上的 HTTP 代理可供仅支持通过 HTTP 与上游代理通信的客户端使用。
使用您的 token 作为用户名进行身份验证；密码留空。
两个端口均可用于任何目标 URL，HTTP 或 HTTPS 均可。

快速开始

将 Smart AI Proxy 设置为您 HTTP 客户端中的代理。这就是全部配置。

禁用 TLS 验证

Smart AI Proxy 会拦截 TLS 连接以添加代理头。您的客户端将看到 Crawlbase 的证书而非目标站点的证书，因此请设置 verify=False / InsecureSkipVerify: true / 同等选项。从 Crawlbase 到目标站点的连接仍然会被验证。

curl -x 'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013' \
     -k 'https://httpbin.org/ip'
import requests

proxies = {
    'http':  'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013',
    'https': 'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013',
}
res = requests.get('https://httpbin.org/ip', proxies=proxies, verify=False)
print(res.text)
const { HttpsProxyAgent } = require('https-proxy-agent');

const agent = new HttpsProxyAgent(
  'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013'
);

const res = await fetch('https://httpbin.org/ip', { agent });
console.log(await res.text());
require 'net/http'

uri = URI('https://httpbin.org/ip')
proxy = Net::HTTP::Proxy('smartproxy.crawlbase.com', 8013, 'YOUR_TOKEN', '')
http = proxy.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE
puts http.get(uri.request_uri).body
package main

import (
    "crypto/tls"
    "fmt"
    "io"
    "net/http"
    "net/url"
)

func main() {
    proxyURL, _ := url.Parse("https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013")
    client := &http.Client{Transport: &http.Transport{
        Proxy:           http.ProxyURL(proxyURL),
        TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
    }}
    res, _ := client.Get("https://httpbin.org/ip")
    body, _ := io.ReadAll(res.Body)
    fmt.Println(string(body))
}

POST 请求

Smart AI Proxy 像转发任何其他 HTTP 方法一样将 POST 请求转发到目标。在客户端上设置代理，然后像往常一样 POST - 代理会保留您的方法、头和 body。下面的示例涵盖了大多数客户端使用的两种 body 形式：表单编码和 JSON。

表单编码 body

# HTTPS proxy on :8013 (use http:// + :8012 for HTTP-only clients)
curl -X POST \
     -H 'Content-Type: application/x-www-form-urlencoded' \
     -F 'param=value' \
     -x 'https://[email protected]:8013' \
     -k 'https://httpbin.org/anything'

import requests
from urllib3.exceptions import InsecureRequestWarning
requests.packages.urllib3.disable_warnings(category=InsecureRequestWarning)

proxies = {
    'http':  'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013',
    'https': 'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013',
}
res = requests.post(
    'https://httpbin.org/anything',
    data={'param': 'value'},
    proxies=proxies,
    verify=False,
)
print(res.status_code, res.text)

const { HttpsProxyAgent } = require('https-proxy-agent');
const querystring = require('querystring');

const agent = new HttpsProxyAgent(
  'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013'
);
const res = await fetch('https://httpbin.org/anything', {
  method: 'POST',
  headers: { 'Content-Type': 'application/x-www-form-urlencoded' },
  body: querystring.stringify({ param: 'value' }),
  agent,
});
console.log(res.status, await res.text());

require 'net/http'
require 'openssl'
require 'uri'

uri = URI('https://httpbin.org/anything')
proxy = Net::HTTP::Proxy('smartproxy.crawlbase.com', 8013, 'YOUR_TOKEN', '')
http = proxy.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

req = Net::HTTP::Post.new(uri.request_uri)
req.set_form_data('param' => 'value')
res = http.request(req)
puts res.code, res.body

package main

import (
    "crypto/tls"
    "fmt"
    "io"
    "net/http"
    "net/url"
    "strings"
)

func main() {
    proxyURL, _ := url.Parse("https://[email protected]:8013")
    client := &http.Client{Transport: &http.Transport{
        Proxy:           http.ProxyURL(proxyURL),
        TLSClientConfig: &tls.Config{InsecureSkipVerify: true},
    }}

    data := url.Values{}
    data.Set("param", "value")
    req, _ := http.NewRequest("POST",
        "https://httpbin.org/anything",
        strings.NewReader(data.Encode()))
    req.Header.Set("Content-Type", "application/x-www-form-urlencoded")

    res, _ := client.Do(req)
    defer res.Body.Close()
    body, _ := io.ReadAll(res.Body)
    fmt.Println(res.Status, string(body))
}

JSON body

curl -X POST \
     -H 'Content-Type: application/json' \
     --data '{"key1":"value1","key2":"value2"}' \
     -x 'https://[email protected]:8013' \
     -k 'https://httpbin.org/anything'

import requests
proxies = {
    'http':  'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013',
    'https': 'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013',
}
res = requests.post(
    'https://httpbin.org/anything',
    json={'key1': 'value1', 'key2': 'value2'},
    proxies=proxies,
    verify=False,
)
print(res.status_code, res.text)

const { HttpsProxyAgent } = require('https-proxy-agent');

const agent = new HttpsProxyAgent(
  'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013'
);
const res = await fetch('https://httpbin.org/anything', {
  method: 'POST',
  headers: { 'Content-Type': 'application/json' },
  body: JSON.stringify({ key1: 'value1', key2: 'value2' }),
  agent,
});
console.log(res.status, await res.text());

require 'net/http'
require 'json'
require 'openssl'
require 'uri'

uri = URI('https://httpbin.org/anything')
proxy = Net::HTTP::Proxy('smartproxy.crawlbase.com', 8013, 'YOUR_TOKEN', '')
http = proxy.new(uri.host, uri.port)
http.use_ssl = true
http.verify_mode = OpenSSL::SSL::VERIFY_NONE

req = Net::HTTP::Post.new(uri.request_uri,
                          'Content-Type' => 'application/json')
req.body = { key1: 'value1', key2: 'value2' }.to_json
puts http.request(req).body

转发头和 cookies

Smart AI Proxy 将您出站请求上的大多数头和 cookies 透传到目标，因此现有客户端无需修改即可继续工作。有两个值得注意的行为：

您的 User-Agent 会按原样转发。发送一个空白的，代理会为您轮换一个真实的 UA。
逐跳和代理控制头（Host、Proxy-Authorization）会被剥离 - 它们描述的是代理本身，而不是被转发的请求。

curl -H 'Accept-Language: en-US,en;q=0.9' \
     -H 'X-Custom-Header: My-Custom-Value' \
     -H 'User-Agent: MyCustomBrowser/1.0' \
     --cookie 'sid=abc123; cart=xyz789' \
     -x 'https://[email protected]:8013' \
     -k 'https://httpbin.org/anything'

上面的示例到达目标时，所有四个自定义头和两个 cookies 都完好无损。要覆盖代理行为（国家、设备、会话、JS 渲染、scrapers 等），请改用 CrawlbaseAPI-* 头 - 这些头由代理解释，永远不会到达目标。

无头浏览器渲染

Smart AI Proxy 由与 Crawling API 相同的无头浏览器集群提供支持。要执行 JavaScript、抓取客户端渲染的 SPA，或应用需要真实浏览器的 Crawling API 功能（截图、滚动、点击选择器、autoparse），请在您的出站请求上传递 CrawlbaseAPI-Parameters: javascript=true 作为头。

# Render with a headless browser, force a 2s wait, scroll to load lazy content
curl -H 'CrawlbaseAPI-Parameters: javascript=true&page_wait=2000&scroll=true' \
     -x 'https://[email protected]:8013' \
     -k 'https://spa.example.com/feed'

使用您的 Normal token（来自仪表板的 Smart AI Proxy token）进行身份验证，而不是 JavaScript token - Smart AI Proxy 会拒绝 JavaScript token 并返回 401 "Your private token is required!"。通过 Smart AI Proxy 进行 JavaScript 渲染仅在 Premium 套餐中可用。完整的浏览器层参数集（page_wait、scroll、css_click_selector、wait_for、截图）可通过 CrawlbaseAPI-Parameters 访问；规范列表请参阅 JavaScript 参数参考。

何时使用 Smart AI Proxy 与 Crawling API

Smart AI Proxy 和 Crawling API 运行在同一网络上，并提供相同的功能面 - JS 渲染、反机器人绕过、国家路由、设备模拟、会话、scrapers、async + 存储，全部都有。在它们之间做选择不是关于能力的问题；而是关于 接口形式、您持有的订阅 以及 该订阅提供的并发层级。

选择 Smart AI Proxy 当……	选择 Crawling API (REST) 当……
您无法更改客户端代码（第三方工具、浏览器扩展、Scrapy、现有的爬虫）	您从零开始构建，希望对每个请求进行明确的控制
您宁愿一次性配置代理，也不愿将每个请求重写到新的端点	您希望以纯粹的 GET 形式查看 URL 和参数，便于日志记录 / 调试
您的订阅是 Smart AI Proxy 计划，具有自己的线程 / 并发等级	您的订阅是 Crawling API 计划，拥有独立的月度配额和并发预算
您希望在现有管道前置 Crawlbase，无需任何代码更改	您希望由其中一个 SDK 为您处理重试、async 轮询和响应解析

所有 Crawling API 参数都可以通过 CrawlbaseAPI-Parameters 头从 Smart AI Proxy 访问（见下文）。能力面是相同的 - 选择适合您订阅和集成形式的方式即可。

控制头

在您发出的请求中传递以 CrawlbaseAPI- 为前缀的自定义头，以控制代理行为。下面三个单一用途的头是便捷快捷方式；完整的 Crawling API 参数集可通过 CrawlbaseAPI-Parameters 访问（在表格之后说明）。

CrawlbaseAPI-Country

ISO 3166可选

强制指定国家：US、GB、DE 等。

CrawlbaseAPI-Device

desktop | mobiledesktop

模拟设备类别。

CrawlbaseAPI-Session-Id

string可选

将会话固定到同一出口 IP。对于需要稳定身份的多步流程很有用。会话存活约 30 分钟。

CrawlbaseAPI-Parameters

query string可选

完整的 Crawling API 参数集作为单一的以 & 连接的字符串传递。任何您会附加到 REST 请求的内容 - javascript=true、page_wait=2000、scroll=true、store=true、&scraper=amazon-product-details、autoparse=true：都可以在这里使用。使用 & 组合多个：例如 "javascript=true&country=US&store=true"。

使用 CrawlbaseAPI-Parameters

上面的单一用途头（Country、Device、Session-Id）是最常用控制的快捷方式。Crawling API 参数集中的任何其他内容 - JS 渲染、scroll、click selectors、scrapers、async + webhooks + 存储、get_cookies、get_headers - 都可以通过 CrawlbaseAPI-Parameters 头访问。格式与您附加到 REST 调用的查询字符串相同：

# JS-rendered SPA, store the result, force US geo
curl -x 'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013' \
     -H 'CrawlbaseAPI-Parameters: javascript=true&country=US&store=true&page_wait=2000' \
     -k 'https://spa.example.com/feed'

# Apply a scraper - same as &scraper=… on the REST endpoint
curl -x 'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013' \
     -H 'CrawlbaseAPI-Parameters: scraper=amazon-product-details' \
     -k 'https://www.amazon.com/dp/B0CHX2XFLN'

冲突解决：如果您同时传递了单一用途头（例如 CrawlbaseAPI-Country: GB）和 CrawlbaseAPI-Parameters 中的同一字段，则单一用途头优先。每个请求选择一种风格，以保持行为可预测。

# Pin to a US session for a multi-step checkout flow
curl -x 'https://YOUR_TOKEN:@smartproxy.crawlbase.com:8013' \
     -H 'CrawlbaseAPI-Country: US' \
     -H 'CrawlbaseAPI-Session-Id: checkout-user-42' \
     -k 'https://shop.example.com/cart'

错误

Smart AI Proxy 返回标准的 HTTP 响应。状态码遵循与 Crawling API 相同的模型。鉴权错误（401、402）由代理本身返回；站点错误（404、500 等）来自目标。