# Allow all bots to crawl the entire site by default User-agent: * Allow: / # Disallow crawling of error pages (using .shtml extension) Disallow: /403.shtml Disallow: /404.shtml Disallow: /429.shtml Disallow: /50x.shtml Disallow: /search.shtml/ Disallow: /ssi/ # Disallow common sensitive or utility directories Disallow: /cgi-bin/ Disallow: /tmp/ Disallow: /cache/ Disallow: /backup/ Disallow: /backups/ Disallow: /db_backup/ Disallow: /drafts/ User-agent: Amazonbot Disallow: / User-agent: Applebot-Extended Disallow: / User-agent: Bytespider Disallow: / User-agent: CCBot Disallow: / User-agent: ClaudeBot Disallow: / User-agent: Google-Extended Disallow: / User-agent: GPTBot Disallow: / User-agent: meta-externalagent Disallow: / # Rules for specific user agents User-agent: Googlebot Allow: /ads.txt User-agent: AdsBot-Google Allow: /ads.txt User-agent: Bingbot Allow: /ads.txt # Block URL parameters that may cause duplicate content Disallow: /*?*utm_* Disallow: /*?*session* # Sitemap location Sitemap: https://myhocdaicuong.com/sitemap.xml