# robots.txt for http://www.ai.sri.com/ # See http://www.robotstxt.org/wc/norobots.html for info # IMPORTANT: Test this file via the URL # http://www.dcs.ed.ac.uk/cgi/sxw/parserobots.pl?site=http%3A%2F%2Fwww.ai.sri.com%2Frobots.txt # First we set the robot restrictions for the SRI web crawler. This # makes sure that we only populate the autogenerated, professional # pages in the SRI search index to improve the results of that engine # # This set should include all the ones in the * set as well. (ie, duplicate lines # so they appear in both) # WMT 7/17/04... Someone obviously changed this before 12/11/2002. # This now blocks all crawlers. # WARNING: Blank lines ARE significant in this file. # NOTE: This is too confusing... # Once we had it block AIC's and (separately) SRI's crawler differently than the world but now it doesn't really. # Part of the rationalization was to make the private home directories (/~user/) not be in SRI's search engine. # Before 2/20/06 /pubs/ was blocked to the world, but Tyson can't understand why, so he unblocked it. # NOTE: The docs indicate this should be after the "*" section. Experiments show otherwise. # NOTE: The user-agent is actually "Action@AI.SRI.COM swish-e spider" but it doesn't match! So just use a substring. # The AIC search engine ## YC-2006-10-02: SRI changed I guess to another crawler using google User-agent: gsa-crawler #Disallow: /mailing-lists/ # Don't let it search the mail archives #Disallow: /~listar/ # Don't let it search the mail archives Disallow: /cgi-bin/ # Executables Disallow: /Harvest/ # Ancient stuff #Disallow: /pubs/ Disallow: /~magic/TileSets/ # Big binary files Disallow: /~magic/tmp/ # Big binary files Disallow: /TileSets/ # Big binary files #Disallow: /geovrml/archive/ # Mail list Disallow: /aic-private/ Disallow: /~oaa/sri-private/ Disallow: /oaa/sri-private/ Disallow: /~daml/sri-private/ Disallow: /daml/sri-private/ Disallow: /~rkf/private/ Disallow: /~communic/private/ #Disallow: /perception/projects/radius/testbed/user-manual/ Disallow: /php/ Disallow: /norobot/ # This is to test to see if robots/spiders are ignoring this robots.txt Disallow: /norobot.html # Another test. # Above here are globally blocked. Below here are blocked only for SRI's index Disallow: /autogen/staff_email_list.php Disallow: /~agno/ Disallow: /ajh/ Disallow: /~appelt/ Disallow: /~bletter/ Disallow: /~eriksen/ Disallow: /~gerdin/ Disallow: /~hobbs/ Disallow: /~israel/ Disallow: /~leei/ Disallow: /~luong/ Disallow: /~myers/ Disallow: /~ortiz/ Disallow: /~wilkins/ Disallow: /~bolles/ Disallow: /~boyce/ Disallow: /~burns/ Disallow: /~chaudhri/ Disallow: /~sakata/ Disallow: /~connolly/ Disallow: /~culy/ Disallow: /~fischler/ Disallow: /~garvey/ Disallow: /~harrison/ Disallow: /~heller/ Disallow: /~hsu/ Disallow: /~jarvis/ Disallow: /~jenkins/ Disallow: /~johnston/ Disallow: /~pkarp/ Disallow: /~karp/ Disallow: /~konolige/ Disallow: /~leclerc/ Disallow: /~tomlee/ Disallow: /~lee/ Disallow: /~lenz/ Disallow: /~lowrance/ Disallow: /~martin/ Disallow: /~morley/ Disallow: /~narayana/ Disallow: /~paley/ Disallow: /~perrault/ Disallow: /~pick/ Disallow: /~acr/ Disallow: /~promero/ Disallow: /~reddy/ Disallow: /~ruspini/ Disallow: /~sheleg/ Disallow: /~thomere/ Disallow: /~valdez/ Disallow: /~vincent/ Disallow: /~waldinger/ Disallow: /~mjw/ # WARNING: If you add any here, add them in all more specific blocks of user agents. # If this line is changed from "User-agent: *", uncomment the User-agent line below. User-agent: * Crawl-delay: 10 Disallow: /mailing-lists/ # Don't let it search the mail archives Disallow: /~listar/ # Don't let it search the mail archives Disallow: /cgi-bin/ Disallow: /Harvest/ #Disallow: /pubs/ Disallow: /~magic/TileSets/ Disallow: /~magic/tmp/ Disallow: /TileSets/ Disallow: /geovrml/archive/ Disallow: /~oaa/sri-private/ Disallow: /oaa/sri-private/ Disallow: /perception/projects/radius/testbed/user-manual/ Disallow: /php/ Disallow: /norobot/ # This is to test to see if robots/spiders are ignoring this robots.txt Disallow: /norobot.html # Another test. # Above here are globally blocked. Below here are blocked only for SRI's index Disallow: /autogen/staff_email_list.php Disallow: /~agno/ Disallow: /ajh/ Disallow: /~appelt/ Disallow: /~bletter/ Disallow: /~eriksen/ Disallow: /~gerdin/ Disallow: /~hobbs/ Disallow: /~israel/ Disallow: /~leei/ Disallow: /~luong/ Disallow: /~myers/ Disallow: /~ortiz/ Disallow: /~wilkins/ Disallow: /~bolles/ Disallow: /~boyce/ Disallow: /~burns/ Disallow: /~chaudhri/ Disallow: /~sakata/ Disallow: /~connolly/ Disallow: /~culy/ Disallow: /~fischler/ Disallow: /~garvey/ Disallow: /~harrison/ Disallow: /~heller/ Disallow: /~hsu/ Disallow: /~jarvis/ Disallow: /~jenkins/ Disallow: /~johnston/ Disallow: /~pkarp/ Disallow: /~karp/ Disallow: /~konolige/ Disallow: /~leclerc/ Disallow: /~tomlee/ Disallow: /~lee/ Disallow: /~lenz/ Disallow: /~lowrance/ Disallow: /~martin/ Disallow: /~morley/ Disallow: /~narayana/ Disallow: /~paley/ Disallow: /~perrault/ Disallow: /~pick/ Disallow: /~acr/ Disallow: /~promero/ Disallow: /~reddy/ Disallow: /~ruspini/ Disallow: /~sheleg/ Disallow: /~thomere/ Disallow: /~valdez/ Disallow: /~vincent/ Disallow: /~waldinger/ Disallow: /~mjw/ # If you uncomment User-agent below, then you need to add a blank line here. # Now we do the robot restrictions for all other web crawlers # NOTE: You can not have multiple "User-agent: *" lines in this file!! #User-agent: * Disallow: /mailing-lists/ # Don't let it search the mail archives Disallow: /~listar/ # Don't let it search the mail archives Disallow: /cgi-bin/ Disallow: /Harvest/ #Disallow: /pubs/ Disallow: /~magic/TileSets/ Disallow: /~magic/tmp/ Disallow: /TileSets/ Disallow: /geovrml/archive/ Disallow: /aic-private/ Disallow: /~oaa/sri-private/ Disallow: /oaa/sri-private/ Disallow: /~daml/sri-private/ Disallow: /daml/sri-private/ Disallow: /~rkf/private/ Disallow: /~communic/private/ Disallow: /perception/projects/radius/testbed/user-manual/ Disallow: /php/ Disallow: /norobot/ # This is to test to see if robots/spiders are ignoring this robots.txt Disallow: /norobot.html # Another test. # WARNING: If you add any here, add them in all more specific blocks of user agents.