139 lines
3.3 KiB
Bash
Executable File
139 lines
3.3 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
set -euo pipefail
|
|
|
|
# Load Cloudflare credentials
|
|
CONFIG_FILE="/home/johan/.config/cloudflare.env"
|
|
if [[ ! -f "$CONFIG_FILE" ]]; then
|
|
echo "Error: Config file not found at $CONFIG_FILE" >&2
|
|
exit 1
|
|
fi
|
|
source "$CONFIG_FILE"
|
|
|
|
if [[ -z "${CF_API_TOKEN:-}" || -z "${CF_ACCOUNT_ID:-}" ]]; then
|
|
echo "Error: CF_API_TOKEN and CF_ACCOUNT_ID must be set in $CONFIG_FILE" >&2
|
|
exit 1
|
|
fi
|
|
|
|
BASE_URL="https://api.cloudflare.com/client/v4/accounts/${CF_ACCOUNT_ID}/browser-rendering"
|
|
|
|
usage() {
|
|
echo "Usage: cf-fetch.sh <command> <url> [options]" >&2
|
|
echo "" >&2
|
|
echo "Commands:" >&2
|
|
echo " markdown <url> Get page as markdown" >&2
|
|
echo " screenshot <url> [output.png] Save screenshot (default: /tmp/screenshot.png)" >&2
|
|
echo " scrape <url> \"<selectors>\" Scrape elements by CSS selector" >&2
|
|
exit 1
|
|
}
|
|
|
|
cmd_markdown() {
|
|
local url="$1"
|
|
local tmpfile
|
|
tmpfile=$(mktemp)
|
|
local http_code
|
|
http_code=$(curl -s -o "$tmpfile" -w "%{http_code}" \
|
|
-H "Authorization: Bearer ${CF_API_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"url\": \"${url}\"}" \
|
|
"${BASE_URL}/markdown") || {
|
|
echo "Error: API request failed" >&2
|
|
rm -f "$tmpfile"
|
|
exit 1
|
|
}
|
|
if [[ "$http_code" -ge 400 ]]; then
|
|
echo "Error: API returned HTTP ${http_code}" >&2
|
|
cat "$tmpfile" >&2
|
|
rm -f "$tmpfile"
|
|
exit 1
|
|
fi
|
|
cat "$tmpfile"
|
|
rm -f "$tmpfile"
|
|
}
|
|
|
|
cmd_screenshot() {
|
|
local url="$1"
|
|
local output="${2:-/tmp/screenshot.png}"
|
|
local http_code
|
|
http_code=$(curl -s -o "$output" -w "%{http_code}" \
|
|
-H "Authorization: Bearer ${CF_API_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"url\": \"${url}\"}" \
|
|
"${BASE_URL}/screenshot") || {
|
|
echo "Error: API request failed" >&2
|
|
exit 1
|
|
}
|
|
if [[ "$http_code" -ge 400 ]]; then
|
|
echo "Error: API returned HTTP ${http_code}" >&2
|
|
cat "$output" >&2
|
|
rm -f "$output"
|
|
exit 1
|
|
fi
|
|
echo "Screenshot saved to ${output}"
|
|
}
|
|
|
|
cmd_scrape() {
|
|
local url="$1"
|
|
local selectors="$2"
|
|
# Build elements array from comma-separated selectors
|
|
local elements="[]"
|
|
IFS=',' read -ra SELS <<< "$selectors"
|
|
local items=()
|
|
for sel in "${SELS[@]}"; do
|
|
sel=$(echo "$sel" | xargs) # trim whitespace
|
|
items+=("{\"selector\": \"${sel}\"}")
|
|
done
|
|
local joined
|
|
joined=$(IFS=','; echo "${items[*]}")
|
|
elements="[${joined}]"
|
|
|
|
local tmpfile
|
|
tmpfile=$(mktemp)
|
|
local http_code
|
|
http_code=$(curl -s -o "$tmpfile" -w "%{http_code}" \
|
|
-H "Authorization: Bearer ${CF_API_TOKEN}" \
|
|
-H "Content-Type: application/json" \
|
|
-d "{\"url\": \"${url}\", \"elements\": ${elements}}" \
|
|
"${BASE_URL}/scrape") || {
|
|
echo "Error: API request failed" >&2
|
|
rm -f "$tmpfile"
|
|
exit 1
|
|
}
|
|
if [[ "$http_code" -ge 400 ]]; then
|
|
echo "Error: API returned HTTP ${http_code}" >&2
|
|
cat "$tmpfile" >&2
|
|
rm -f "$tmpfile"
|
|
exit 1
|
|
fi
|
|
cat "$tmpfile"
|
|
rm -f "$tmpfile"
|
|
}
|
|
|
|
# Main
|
|
if [[ $# -lt 2 ]]; then
|
|
usage
|
|
fi
|
|
|
|
command="$1"
|
|
url="$2"
|
|
shift 2
|
|
|
|
case "$command" in
|
|
markdown)
|
|
cmd_markdown "$url"
|
|
;;
|
|
screenshot)
|
|
cmd_screenshot "$url" "${1:-}"
|
|
;;
|
|
scrape)
|
|
if [[ $# -lt 1 ]]; then
|
|
echo "Error: scrape requires CSS selectors argument" >&2
|
|
usage
|
|
fi
|
|
cmd_scrape "$url" "$1"
|
|
;;
|
|
*)
|
|
echo "Error: Unknown command '${command}'" >&2
|
|
usage
|
|
;;
|
|
esac
|