/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.crawler.robots;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.regex.Pattern;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.crawler.robots.RobotsTxt;

public final class RobotsTxtParser {
    private static final Pattern patternTab = Pattern.compile("\t");
    private static final String ROBOTS_USER_AGENT = "User-agent:".toUpperCase();
    private static final String ROBOTS_DISALLOW = "Disallow:".toUpperCase();
    private static final String ROBOTS_ALLOW = "Allow:".toUpperCase();
    private static final String ROBOTS_COMMENT = "#";
    private static final String ROBOTS_SITEMAP = "Sitemap:".toUpperCase();
    private static final String ROBOTS_CRAWL_DELAY = "Crawl-delay:".toUpperCase();
    private final ArrayList<String> allowList = new ArrayList(0);
    private final ArrayList<String> denyList = new ArrayList(0);
    private final ArrayList<String> sitemaps = new ArrayList(0);
    private long crawlDelayMillis = 0L;
    private final String[] myNames;
    private String agentName;

    protected RobotsTxtParser(String[] myNames) {
        this.myNames = myNames;
        this.agentName = null;
    }

    protected RobotsTxtParser(String[] myNames, byte[] robotsTxt) {
        this(myNames);
        if (robotsTxt != null && robotsTxt.length != 0) {
            ByteArrayInputStream bin = new ByteArrayInputStream(robotsTxt);
            BufferedReader reader = new BufferedReader(new InputStreamReader(bin));
            this.parse(reader);
        }
    }

    private void parse(BufferedReader reader) {
        ArrayList<String> deny4AllAgents = new ArrayList<String>();
        ArrayList<String> deny4ThisAgents = new ArrayList<String>();
        ArrayList<String> allow4AllAgents = new ArrayList<String>();
        ArrayList<String> allow4ThisAgents = new ArrayList<String>();
        String line = null;
        String lineUpper = null;
        boolean isRule4AllAgents = false;
        boolean isRule4ThisAgents = false;
        boolean rule4ThisAgentsFound = false;
        boolean inBlock = false;
        try {
            while ((line = reader.readLine()) != null) {
                int pos;
                line = patternTab.matcher(line).replaceAll(" ").trim();
                lineUpper = line.toUpperCase();
                if (line.isEmpty() || line.startsWith(ROBOTS_COMMENT)) continue;
                if (lineUpper.startsWith(ROBOTS_SITEMAP)) {
                    pos = line.indexOf(32);
                    if (pos == -1) continue;
                    this.sitemaps.add(line.substring(pos).trim());
                    continue;
                }
                if (lineUpper.startsWith(ROBOTS_USER_AGENT)) {
                    if (inBlock) {
                        inBlock = false;
                        isRule4AllAgents = false;
                        isRule4ThisAgents = false;
                        this.crawlDelayMillis = 0L;
                    }
                    if ((pos = line.indexOf(ROBOTS_COMMENT)) != -1) {
                        line = line.substring(0, pos).trim();
                    }
                    if ((pos = line.indexOf(32)) == -1) continue;
                    String userAgent = line.substring(pos).trim();
                    isRule4AllAgents |= userAgent.equals("*");
                    for (String agent : this.myNames) {
                        if (!userAgent.toLowerCase().equals(agent.toLowerCase())) continue;
                        this.agentName = agent;
                        isRule4ThisAgents = true;
                        break;
                    }
                    if (!isRule4ThisAgents) continue;
                    rule4ThisAgentsFound = true;
                    continue;
                }
                if (lineUpper.startsWith(ROBOTS_CRAWL_DELAY)) {
                    inBlock = true;
                    if (!isRule4ThisAgents && !isRule4AllAgents || (pos = line.indexOf(32)) == -1) continue;
                    try {
                        this.crawlDelayMillis = (long)(1000.0 * (double)Float.parseFloat(line.substring(pos).trim()));
                        this.crawlDelayMillis = Math.min(10000L, this.crawlDelayMillis);
                    }
                    catch (NumberFormatException userAgent) {}
                    continue;
                }
                if (!lineUpper.startsWith(ROBOTS_DISALLOW) && !lineUpper.startsWith(ROBOTS_ALLOW)) continue;
                inBlock = true;
                boolean isDisallowRule = lineUpper.startsWith(ROBOTS_DISALLOW);
                if (!isRule4ThisAgents && !isRule4AllAgents) continue;
                pos = line.indexOf(ROBOTS_COMMENT);
                if (pos != -1) {
                    line = line.substring(0, pos).trim();
                }
                if (line.endsWith("*")) {
                    line = line.substring(0, line.length() - 1);
                }
                if ((pos = line.indexOf(32)) < 0) continue;
                String path = line.substring(pos).trim();
                try {
                    path = UTF8.decodeURL(path);
                }
                catch (Exception exception) {
                    // empty catch block
                }
                path = RobotsTxt.ROBOTS_DB_PATH_SEPARATOR_MATCHER.matcher(path).replaceAll("%3B");
                if (isDisallowRule) {
                    if (isRule4AllAgents) {
                        deny4AllAgents.add(path);
                    }
                    if (!isRule4ThisAgents) continue;
                    deny4ThisAgents.add(path);
                    continue;
                }
                if (isRule4AllAgents) {
                    allow4AllAgents.add(path);
                }
                if (!isRule4ThisAgents) continue;
                allow4ThisAgents.add(path);
            }
        }
        catch (IOException iOException) {
            // empty catch block
        }
        this.allowList.addAll(rule4ThisAgentsFound ? allow4ThisAgents : allow4AllAgents);
        this.denyList.addAll(rule4ThisAgentsFound ? deny4ThisAgents : deny4AllAgents);
    }

    protected long crawlDelayMillis() {
        return this.crawlDelayMillis;
    }

    protected String agentName() {
        return this.agentName;
    }

    protected ArrayList<String> sitemap() {
        return this.sitemaps;
    }

    protected ArrayList<String> allowList() {
        return this.allowList;
    }

    protected ArrayList<String> denyList() {
        return this.denyList;
    }
}

