mirror of
https://github.com/alibaba/higress.git
synced 2026-05-27 06:07:27 +08:00
Go WAF Plugin (#400)
This commit is contained in:
@@ -0,0 +1,82 @@
|
||||
# Search engine crawlers and other bots
|
||||
# crawler
|
||||
# https://80legs.com/
|
||||
80legs
|
||||
# scraping framework
|
||||
# https://ache.readthedocs.io/en/latest/
|
||||
# User-Agent: (Mozilla/5.0 (compatible; ACME/VERSION; +OPERATOR_CONTACT_URL; +OPERATOR_CONTACT_EMAIL)
|
||||
ACHE/
|
||||
# SEO
|
||||
# https://ahrefs.com/robot
|
||||
AhrefsBot
|
||||
# site ripper
|
||||
# http://www.softbytelabs.com/en/BlackWidow/
|
||||
black widow
|
||||
blackwidow
|
||||
# security crawler
|
||||
# User-Agent: Censys: Mozilla/5.0 (compatible; CensysInspect/1.1; +https://about.censys.io/)
|
||||
CensysInspect
|
||||
# scraping framework
|
||||
# http://go-colly.org/
|
||||
# User-Agent: colly - https://github.com/gocolly/colly/v2
|
||||
colly -
|
||||
# scraping framework
|
||||
# https://github.com/yasserg/crawler4j
|
||||
# User-Agent: crawler4j (https://github.com/yasserg/crawler4j/)
|
||||
crawler4j
|
||||
# SEO
|
||||
# advertising targeting
|
||||
# https://www.grapeshot.com/crawler/
|
||||
grapeFX
|
||||
GrapeshotCrawler/2.0
|
||||
# scraping framework
|
||||
# https://github.com/internetarchive/heritrix3
|
||||
# User-Agent: "Mozilla/5.0 (compatible; heritrix/VERSION +OPERATOR_CONTACT_URL)
|
||||
heritrix/
|
||||
# User-Agent: Krzana bot
|
||||
# https://krzana.com/
|
||||
Krzana bot
|
||||
# misbehaving spider
|
||||
Lingewoud-550-Spyder
|
||||
# scraping framework
|
||||
# http://docs.seattlerb.org/mechanize/Mechanize.html
|
||||
Mechanize
|
||||
# SEO
|
||||
# http://www.majestic12.co.uk/projects/dsearch/mj12bot.php
|
||||
MJ12bot
|
||||
# scraping framework
|
||||
# https://nutch.apache.org/
|
||||
# User-Agent: NutchCVS/VERSION (Nutch; http://lucene.apache.org/nutch/bot.html; nutch-agent@lucene.apache.org)
|
||||
NutchCVS/
|
||||
# news service
|
||||
Owlin bot
|
||||
# people database
|
||||
# https://pipl.com/bot/
|
||||
PiplBot
|
||||
# crawler
|
||||
# 2006
|
||||
prowebwalker
|
||||
# generic crawler
|
||||
pymills-spider/
|
||||
# scraping framework
|
||||
# https://docs.pyspider.org/en/latest/
|
||||
# User-Agent: pyspider/VERSION (+http://pyspider.org/)
|
||||
pyspider/
|
||||
# SEO
|
||||
# https://moz.com/help/guides/moz-procedures/what-is-rogerbot
|
||||
rogerbot
|
||||
# SEO
|
||||
# http://www.searchmetrics.com/searchmetricsbot/
|
||||
SearchmetricsBot
|
||||
# SEO
|
||||
# https://www.semrush.com/bot/
|
||||
SemrushBot
|
||||
# SEO
|
||||
# User-Agent: Mozilla/5.0 (compatible; seoscanners.net/1; +spider@seoscanners.net)
|
||||
seoscanners.net
|
||||
# scraping framework
|
||||
# https://scrapy.org/
|
||||
# User-Agent: Scrapy/VERSION (+https://scrapy.org)
|
||||
Scrapy/
|
||||
# https://www.wappalyzer.com/
|
||||
Wappalyzer
|
||||
Reference in New Issue
Block a user