Crawling

Start website crawl

Queues a new crawl job and returns a scanId. The crawl runs asynchronously.

curl -X POST "https://api.neostra.io/api/v1/crawl/start" \
  -H "Content-Type: application/json" \
  -H "Authorization: Bearer YOUR_API_TOKEN (JWT)" \
  -d '{
  "tenantId": "123e4567-e89b-12d3-a456-426614174000",
  "website": "example_string",
  "urlFilterRegexInclusive": "example_string",
  "urlFilterRegexExclusive": "example_string",
  "maxPages": 25
}'

import requests
import json

url = "https://api.neostra.io/api/v1/crawl/start"
headers = {
  "Content-Type": "application/json",
  "Authorization": "Bearer YOUR_API_TOKEN (JWT)"
}
data = {
  "tenantId": "123e4567-e89b-12d3-a456-426614174000",
  "website": "example_string",
  "urlFilterRegexInclusive": "example_string",
  "urlFilterRegexExclusive": "example_string",
  "maxPages": 25
}

response = requests.post(url, headers=headers, json=data)
print(response.json())

const response = await fetch("https://api.neostra.io/api/v1/crawl/start", {
  method: "POST",
  headers: {
  "Content-Type": "application/json",
  "Authorization": "Bearer YOUR_API_TOKEN (JWT)"
},
  body: JSON.stringify({
  "tenantId": "123e4567-e89b-12d3-a456-426614174000",
  "website": "example_string",
  "urlFilterRegexInclusive": "example_string",
  "urlFilterRegexExclusive": "example_string",
  "maxPages": 25
})
});

const data = await response.json();
console.log(data);

package main

import (
    "fmt"
    "net/http"
    "bytes"
    "encoding/json"
)

func main() {
    data := []byte(`{
  "tenantId": "123e4567-e89b-12d3-a456-426614174000",
  "website": "example_string",
  "urlFilterRegexInclusive": "example_string",
  "urlFilterRegexExclusive": "example_string",
  "maxPages": 25
}`)

    req, err := http.NewRequest("POST", "https://api.neostra.io/api/v1/crawl/start", bytes.NewBuffer(data))
    if err != nil {
        panic(err)
    }

    req.Header.Set("Content-Type", "application/json")
    req.Header.Set("Authorization", "Bearer YOUR_API_TOKEN (JWT)")

    client := &http.Client{}
    resp, err := client.Do(req)
    if err != nil {
        panic(err)
    }
    defer resp.Body.Close()

    fmt.Println("Response Status:", resp.Status)
}

require 'net/http'
require 'json'

uri = URI('https://api.neostra.io/api/v1/crawl/start')
http = Net::HTTP.new(uri.host, uri.port)
http.use_ssl = true

request = Net::HTTP::Post.new(uri)
request['Content-Type'] = 'application/json'
request['Authorization'] = 'Bearer YOUR_API_TOKEN (JWT)'
request.body = '{
  "tenantId": "123e4567-e89b-12d3-a456-426614174000",
  "website": "example_string",
  "urlFilterRegexInclusive": "example_string",
  "urlFilterRegexExclusive": "example_string",
  "maxPages": 25
}'

response = http.request(request)
puts response.body

{
  "success": true,
  "message": "example_string",
  "data": {
    "scanId": "123e4567-e89b-12d3-a456-426614174000"
  }
}

{
  "error": "Bad Request",
  "message": "The request contains invalid parameters or malformed data",
  "code": 400,
  "details": [
    {
      "field": "email",
      "message": "Invalid email format"
    }
  ]
}

POST

/api/v1/crawl/start

POST

Bearer Token (JWT)

Bearer Tokenstring

Required

Bearer token (JWT) - just enter the token, "Bearer" prefix will be added automatically

Content-Typestring

Required

The media type of the request body

Options: application/json

Format: uuid

Root URL to crawl

Format: uri

urlFilterRegexInclusivestring

Regex — only URLs matching this pattern will be crawled

urlFilterRegexExclusivestring

Regex — URLs matching this pattern will be skipped

maxPagesinteger

Maximum number of pages to crawl

Request Preview

Response

Response will appear here after sending the request

Authentication

header

Authorizationstring

Required

Bearer token (JWT). Authentication token required.

Body

application/json

Root URL to crawl

urlFilterRegexInclusivestring

Regex — only URLs matching this pattern will be crawled

urlFilterRegexExclusivestring

Regex — URLs matching this pattern will be skipped

maxPagesinteger

Maximum number of pages to crawl

Responses

Was this page helpful?

Last updated 1 week ago

Built with Documentation.AI

curl -X POST "https://api.neostra.io/api/v1/crawl/start" \ -H "Content-Type: application/json" \ -H "Authorization: Bearer YOUR_API_TOKEN (JWT)" \ -d '{ "tenantId": "123e4567-e89b-12d3-a456-426614174000", "website": "example_string", "urlFilterRegexInclusive": "example_string", "urlFilterRegexExclusive": "example_string", "maxPages": 25 }'