Files
higress/plugins/wasm-go/extensions/waf/wasmplugin/rules/crs/crawlers-user-agents.data
2023-06-28 19:25:36 +08:00

83 lines
2.1 KiB
Plaintext

# Search engine crawlers and other bots
# crawler
# https://80legs.com/
80legs
# scraping framework
# https://ache.readthedocs.io/en/latest/
# User-Agent: (Mozilla/5.0 (compatible; ACME/VERSION; +OPERATOR_CONTACT_URL; +OPERATOR_CONTACT_EMAIL)
ACHE/
# SEO
# https://ahrefs.com/robot
AhrefsBot
# site ripper
# http://www.softbytelabs.com/en/BlackWidow/
black widow
blackwidow
# security crawler
# User-Agent: Censys: Mozilla/5.0 (compatible; CensysInspect/1.1; +https://about.censys.io/)
CensysInspect
# scraping framework
# http://go-colly.org/
# User-Agent: colly - https://github.com/gocolly/colly/v2
colly -
# scraping framework
# https://github.com/yasserg/crawler4j
# User-Agent: crawler4j (https://github.com/yasserg/crawler4j/)
crawler4j
# SEO
# advertising targeting
# https://www.grapeshot.com/crawler/
grapeFX
GrapeshotCrawler/2.0
# scraping framework
# https://github.com/internetarchive/heritrix3
# User-Agent: "Mozilla/5.0 (compatible; heritrix/VERSION +OPERATOR_CONTACT_URL)
heritrix/
# User-Agent: Krzana bot
# https://krzana.com/
Krzana bot
# misbehaving spider
Lingewoud-550-Spyder
# scraping framework
# http://docs.seattlerb.org/mechanize/Mechanize.html
Mechanize
# SEO
# http://www.majestic12.co.uk/projects/dsearch/mj12bot.php
MJ12bot
# scraping framework
# https://nutch.apache.org/
# User-Agent: NutchCVS/VERSION (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
NutchCVS/
# news service
Owlin bot
# people database
# https://pipl.com/bot/
PiplBot
# crawler
# 2006
prowebwalker
# generic crawler
pymills-spider/
# scraping framework
# https://docs.pyspider.org/en/latest/
# User-Agent: pyspider/VERSION (+http://pyspider.org/)
pyspider/
# SEO
# https://moz.com/help/guides/moz-procedures/what-is-rogerbot
rogerbot
# SEO
# http://www.searchmetrics.com/searchmetricsbot/
SearchmetricsBot
# SEO
# https://www.semrush.com/bot/
SemrushBot
# SEO
# User-Agent: Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)
seoscanners.net
# scraping framework
# https://scrapy.org/
# User-Agent: Scrapy/VERSION (+https://scrapy.org)
Scrapy/
# https://www.wappalyzer.com/
Wappalyzer