SuccessFactors Jobs API.
SAP's human experience management suite used by enterprises globally for talent acquisition and HR management.
Try the API.
Test Jobs, Feed, and Auto-Apply endpoints against https://connect.jobo.world with live request/response examples, then copy ready-to-use curl commands.
What's in every response.
Data fields, real-world applications, and the companies already running on SuccessFactors.
- Global enterprise coverage
- Multi-industry
- Structured data
- Compensation info
- Requirements detail
- Multi-locale support
How to scrape SuccessFactors.
Step-by-step guide to extracting jobs from SuccessFactors-powered career pages—endpoints, authentication, and working code.
import requests
from bs4 import BeautifulSoup
import re
# SAP-hosted pattern: https://{company}.jobs.hr.cloud.sap/
# Custom domain pattern: https://careers.{company}.com/
company_url = "https://wlgore.jobs.hr.cloud.sap"
# Fetch homepage to get CSRF token and session cookie
session = requests.Session()
response = session.get(company_url)
html = response.text
# Extract CSRF token from HTML (usually in script or meta tags)
csrf_match = re.search(r'x-csrf-token["':\s]+["']([^"']+)["']', html, re.IGNORECASE)
csrf_token = csrf_match.group(1) if csrf_match else None
print(f"CSRF Token: {csrf_token[:20]}..." if csrf_token else "CSRF token not found")from bs4 import BeautifulSoup
soup = BeautifulSoup(html, 'html.parser')
# Find category links in navigation
# Pattern: /go/{category-name}/{categoryId}/
categories = []
for link in soup.find_all('a', href=True):
href = link['href']
match = re.match(r'/go/([^/]+)/(d+)', href)
if match:
categories.append({
'name': match.group(1),
'id': int(match.group(2))
})
print(f"Found {len(categories)} categories")
for cat in categories[:5]:
print(f" - {cat['name']}: {cat['id']}")
# Use first category or a default
category_id = categories[0]['id'] if categories else Noneimport requests
api_url = f"{company_url}/services/recruiting/v1/jobs"
headers = {
"Content-Type": "application/json",
"x-csrf-token": csrf_token,
}
payload = {
"locale": "en_US",
"pageNumber": 0,
"sortBy": "",
"keywords": "",
"location": "",
"facetFilters": {},
"brand": "",
"skills": [],
"categoryId": category_id, # Company-specific ID
"alertId": "",
"rcmCandidateId": ""
}
response = session.post(api_url, headers=headers, json=payload)
data = response.json()
jobs = data.get("jobSearchResult", [])
print(f"Found {len(jobs)} jobs on page 0")parsed_jobs = []
for item in jobs:
job = item.get("response", {})
parsed_jobs.append({
"id": job.get("id"),
"title": job.get("unifiedStandardTitle"),
"url_title": job.get("urlTitle"),
"location": job.get("jobLocationShort", [None])[0],
"country": job.get("jobLocationCountry", [None])[0],
"state": job.get("jobLocationState", [None])[0],
"business_unit": job.get("businessUnit_obj", [None])[0],
"posted_date": job.get("unifiedStandardStart"),
"currency": job.get("currency", [None])[0],
"locales": job.get("supportedLocales", []),
})
# Clean up location string (remove <br/> tags)
for job in parsed_jobs:
if job["location"]:
job["location"] = job["location"].replace("<br/>", "").strip()
print(f"Parsed {len(parsed_jobs)} jobs")
print(f"Sample: {parsed_jobs[0]['title']} - {parsed_jobs[0]['location']}")from bs4 import BeautifulSoup
import time
def fetch_job_details(session, base_url, job):
"""Fetch full job details from the HTML detail page."""
# Build job detail URL: /job/{urlTitle}/{id}-{locale}/
detail_url = f"{base_url}/job/{job['url_title']}/{job['id']}-en_US"
response = session.get(detail_url)
soup = BeautifulSoup(response.text, 'html.parser')
# Extract job description (common selectors)
description = None
desc_elem = soup.find(class_='job-description')
if not desc_elem:
desc_elem = soup.find('main')
if not desc_elem:
desc_elem = soup.find('article')
if desc_elem:
description = desc_elem.get_text(separator=' ', strip=True)
# Extract additional fields from HTML
req_id = None
for elem in soup.find_all(string=re.compile(r'Reqs*Id:', re.I)):
req_id = elem.find_next(string=re.compile(r'd+'))
if req_id:
req_id = re.search(r'd+', str(req_id)).group()
break
return {
**job,
"description": description,
"req_id": req_id,
"detail_url": detail_url,
}
# Fetch details for first job as example
job_with_details = fetch_job_details(session, company_url, parsed_jobs[0])
print(f"Description length: {len(job_with_details.get('description', '') or '')} chars")import time
def fetch_all_jobs(session, base_url, csrf_token, category_id, locale="en_US"):
"""Fetch all jobs with pagination handling."""
api_url = f"{base_url}/services/recruiting/v1/jobs"
headers = {
"Content-Type": "application/json",
"x-csrf-token": csrf_token,
}
all_jobs = []
page_number = 0
while True:
payload = {
"locale": locale,
"pageNumber": page_number,
"sortBy": "",
"keywords": "",
"location": "",
"facetFilters": {},
"brand": "",
"skills": [],
"categoryId": category_id,
"alertId": "",
"rcmCandidateId": ""
}
response = session.post(api_url, headers=headers, json=payload)
data = response.json()
jobs = data.get("jobSearchResult", [])
if not jobs:
break
all_jobs.extend(jobs)
print(f"Page {page_number}: {len(jobs)} jobs (total: {len(all_jobs)})")
page_number += 1
time.sleep(1) # Rate limiting: ~60 requests/minute
return all_jobs
all_jobs = fetch_all_jobs(session, company_url, csrf_token, category_id)
print(f"Total jobs fetched: {len(all_jobs)}")SuccessFactors embeds CSRF tokens in the HTML page, typically in script tags or meta tags. Try multiple regex patterns: x-csrf-token, csrfToken, _csrf, or X-CSRF-Token. Some implementations may use different token names.
The SuccessFactors listings API only returns metadata, not full descriptions. You must fetch individual job detail HTML pages and parse the description from the page content. Plan for N+1 API calls where N is the number of jobs.
Each company has unique category IDs. Parse the homepage navigation to discover categories using the /go/{name}/{id} URL pattern. Some implementations allow categoryId to be omitted or set to null to return all jobs.
JSESSIONID cookies expire after inactivity (typically 15-30 minutes). Implement session refresh logic that re-fetches the homepage when receiving 401/403 errors. Store the timestamp of last token fetch and refresh proactively.
Not all SuccessFactors instances use the standard SAP-hosted pattern. Custom domains may have different URL structures or lack the API entirely. Detect this by checking for /services/recruiting/v1/jobs endpoint availability and fall back to HTML-only scraping.
SuccessFactors may show CAPTCHAs for automated requests. Use realistic User-Agent headers, add 1-2 second delays between requests, and respect rate limits. Consider residential proxies for large-scale scraping operations.
Jobs may only exist in certain locales (en_US, de_DE, ja_JP, zh_CN). The locale parameter affects both API responses and HTML content. Try multiple locales if jobs appear missing, or check the supportedLocales field in the API response.
- 1Use the POST API for efficient job discovery, then fetch HTML for descriptions
- 2Cache CSRF tokens and session cookies, but refresh before they expire
- 3Parse category IDs from homepage navigation rather than hardcoding
- 4Add 1-2 second delays between requests to avoid rate limiting
- 5Implement fallback to HTML-only scraping for custom domain implementations
- 6Store both API metadata and HTML-parsed descriptions for complete data
One endpoint. All SuccessFactors jobs. No scraping, no sessions, no maintenance.
Get API accesscurl "https://enterprise.jobo.world/api/jobs?sources=successfactors" \
-H "X-Api-Key: YOUR_KEY" Access SuccessFactors
job data today.
One API call. Structured data. No scraping infrastructure to build or maintain — start with the free tier and scale as you grow.