Personio Jobs API.
German HR platform with a public JSON API that returns complete job listings including full HTML descriptions in a single request.
Try the API.
Test Jobs, Feed, and Auto-Apply endpoints against https://connect.jobo.world with live request/response examples, then copy ready-to-use curl commands.
What's in every response.
Data fields, real-world applications, and the companies already running on Personio.
- European coverage
- Multi-language support
- SMB focus
- Full descriptions in API
- Rich metadata
- Multiple office locations
- 01European job market tracking
- 02SMB hiring monitoring
- 03Multi-language job aggregation
- 04Startup ecosystem scraping
How to scrape Personio.
Step-by-step guide to extracting jobs from Personio-powered career pages—endpoints, authentication, and working code.
import re
def extract_company_info(url: str) -> tuple[str, str] | None:
"""Extract company slug and domain from Personio URL."""
pattern = r"^https?://([^.]+)\.jobs\.personio\.(com|de)"
match = re.match(pattern, url, re.IGNORECASE)
if match:
company = match.group(1)
domain = match.group(2) # "com" or "de"
return company, domain
return None
# Example usage
url = "https://mercanis.jobs.personio.de"
result = extract_company_info(url)
print(result) # ('mercanis', 'de')import requests
def fetch_personio_jobs(company: str, domain: str = "de") -> list[dict]:
"""Fetch all jobs from Personio search.json API."""
url = f"https://{company}.jobs.personio.{domain}/search.json"
response = requests.get(url, timeout=30)
response.raise_for_status()
jobs = response.json()
print(f"Found {len(jobs)} jobs for {company}")
return jobs
# Example usage
jobs = fetch_personio_jobs("mercanis", "de")
print(f"First job: {jobs[0]['name']}")def parse_job(job: dict, company: str, domain: str) -> dict:
"""Parse a Personio job object into a structured format."""
return {
"id": job.get("id"),
"title": job.get("name"),
"department": job.get("department"),
"office": job.get("office"),
"offices": job.get("offices", []),
"employment_type": job.get("employment_type"),
"schedule": job.get("schedule"), # e.g., "Full-time"
"seniority": job.get("seniority"), # e.g., "Entry level"
"category": job.get("category"),
"keywords": job.get("keywords", "").split(",") if job.get("keywords") else [],
"description_html": job.get("description"), # Full HTML description
"subcompany": job.get("subcompany"),
"url": f"https://{company}.jobs.personio.{domain}/job/{job.get('id')}",
}
# Parse all jobs
parsed_jobs = [parse_job(job, "mercanis", "de") for job in jobs]
for job in parsed_jobs[:3]:
print(f"- {job['title']} ({job['department']})")def fetch_jobs_by_language(company: str, language: str = "en") -> list[dict]:
"""Fetch jobs filtered by language."""
url = f"https://{company}.jobs.personio.de/search.json"
params = {"language": language}
response = requests.get(url, params=params, timeout=30)
response.raise_for_status()
return response.json()
# Fetch English jobs only
english_jobs = fetch_jobs_by_language("mercanis", "en")
print(f"Found {len(english_jobs)} English jobs")
# Also available: "de" (German), "fr" (French), "es" (Spanish)import time
import requests
from requests.exceptions import RequestException
def fetch_jobs_safe(company: str, domain: str = "de", retries: int = 3) -> list[dict]:
"""Fetch jobs with error handling and retries."""
url = f"https://{company}.jobs.personio.{domain}/search.json"
for attempt in range(retries):
try:
response = requests.get(url, timeout=30)
response.raise_for_status()
return response.json()
except RequestException as e:
print(f"Attempt {attempt + 1} failed for {company}: {e}")
if attempt < retries - 1:
time.sleep(2 ** attempt) # Exponential backoff
print(f"Failed to fetch jobs for {company} after {retries} attempts")
return []
def scrape_multiple_companies(companies: list[str], delay: float = 1.0) -> dict:
"""Scrape jobs from multiple Personio companies."""
results = {}
for company in companies:
results[company] = fetch_jobs_safe(company)
time.sleep(delay) # Rate limit: ~60 req/min
return resultsVerify the company URL is correct. Try both .com and .de TLDs. Some companies use different subdomains than their company name.
Some jobs may have empty description fields. Check if the field exists and has content before using it. Consider falling back to the individual job page HTML if needed.
The /xml and /search endpoints are blocked in robots.txt but remain publicly accessible. Prefer using /search.json which is more reliable.
Add delays between requests (500ms-1s). Personio doesn't publish official rate limits but may throttle aggressive scraping.
Personio does not provide a sitemap of companies. Use third-party data sources, search engine dorking (site:jobs.personio.de), or DNS enumeration for discovery.
- 1Use the search.json endpoint for complete job data with full descriptions in a single request
- 2Handle both .com and .de TLDs when discovering companies
- 3Add 500ms-1s delay between requests to avoid rate limiting
- 4Cache results - job boards typically update daily at most
- 5Use the language parameter to filter multilingual job boards
- 6Validate company URLs before scraping using the API endpoint
One endpoint. All Personio jobs. No scraping, no sessions, no maintenance.
Get API accesscurl "https://enterprise.jobo.world/api/jobs?sources=personio" \
-H "X-Api-Key: YOUR_KEY" Access Personio
job data today.
One API call. Structured data. No scraping infrastructure to build or maintain — start with the free tier and scale as you grow.