/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.crawler;

import java.io.File;
import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.encoding.UTF8;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.order.ByteOrder;
import net.yacy.cora.order.NaturalOrder;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.cora.util.SpaceExceededException;
import net.yacy.crawler.RecrawlBusyThread;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.data.CrawlQueues;
import net.yacy.crawler.data.NoticedURL;
import net.yacy.crawler.retrieval.Request;
import net.yacy.document.parser.html.TagValency;
import net.yacy.kelondro.blob.MapHeap;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.index.RowHandleSet;
import net.yacy.kelondro.util.FileUtils;
import net.yacy.kelondro.util.kelondroException;
import net.yacy.search.Switchboard;

public final class CrawlSwitchboard {
    public static final String CRAWL_PROFILE_AUTOCRAWL_DEEP = "autocrawlDeep";
    public static final String CRAWL_PROFILE_AUTOCRAWL_SHALLOW = "autocrawlShallow";
    public static final String CRAWL_PROFILE_RECRAWL_JOB = "recrawlJob";
    public static final String CRAWL_PROFILE_PROXY = "proxy";
    public static final String CRAWL_PROFILE_REMOTE = "remote";
    public static final String CRAWL_PROFILE_SNIPPET_LOCAL_TEXT = "snippetLocalText";
    public static final String CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT = "snippetGlobalText";
    public static final String CRAWL_PROFILE_GREEDY_LEARNING_TEXT = "snippetGreedyLearningText";
    public static final String CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA = "snippetLocalMedia";
    public static final String CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA = "snippetGlobalMedia";
    public static final String CRAWL_PROFILE_PACKS = "packs";
    public static Set<String> DEFAULT_PROFILES = new HashSet<String>();
    public static final String DBFILE_ACTIVE_CRAWL_PROFILES = "crawlProfilesActive1.heap";
    public static final String DBFILE_PASSIVE_CRAWL_PROFILES = "crawlProfilesPassive1.heap";
    public static final long CRAWL_PROFILE_RECRAWL_JOB_RECRAWL_CYCLE = 60L;
    public static final long CRAWL_PROFILE_PROXY_RECRAWL_CYCLE = 1440L;
    public static final long CRAWL_PROFILE_SNIPPET_LOCAL_TEXT_RECRAWL_CYCLE = 43200L;
    public static final long CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT_RECRAWL_CYCLE = 43200L;
    public static final long CRAWL_PROFILE_GREEDY_LEARNING_TEXT_RECRAWL_CYCLE = 43200L;
    public static final long CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA_RECRAWL_CYCLE = 43200L;
    public static final long CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA_RECRAWL_CYCLE = 43200L;
    public static final long CRAWL_PROFILE_PACK_RECRAWL_CYCLE = 43200L;
    private final ConcurrentLog log;
    private MapHeap profilesActiveCrawls;
    private final MapHeap profilesPassiveCrawls;
    private final Map<byte[], CrawlProfile> profilesActiveCrawlsCache;
    private final Map<String, RowHandleSet> profilesActiveCrawlsCounter;
    public CrawlProfile defaultProxyProfile;
    public CrawlProfile defaultRemoteProfile;
    public CrawlProfile defaultTextSnippetLocalProfile;
    public CrawlProfile defaultTextSnippetGlobalProfile;
    public CrawlProfile defaultTextGreedyLearningProfile;
    public CrawlProfile defaultMediaSnippetLocalProfile;
    public CrawlProfile defaultMediaSnippetGlobalProfile;
    public CrawlProfile defaultPackProfile;
    public CrawlProfile defaultAutocrawlDeepProfile;
    public CrawlProfile defaultAutocrawlShallowProfile;
    public CrawlProfile defaultRecrawlJobProfile;
    private final Map<String, CrawlProfile> defaultPushProfiles;
    private final File queuesRoot;
    private final Switchboard switchboard;

    public CrawlSwitchboard(Switchboard switchboard) {
        this.switchboard = switchboard;
        this.log = this.switchboard.log;
        this.queuesRoot = this.switchboard.queuesRoot;
        this.defaultPushProfiles = new ConcurrentHashMap<String, CrawlProfile>();
        this.profilesActiveCrawlsCache = Collections.synchronizedMap(new TreeMap(Base64Order.enhancedCoder));
        this.profilesActiveCrawlsCounter = new ConcurrentHashMap<String, RowHandleSet>();
        this.queuesRoot.mkdirs();
        this.log.config("Initializing Crawl Profiles");
        File profilesActiveFile = new File(this.queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES);
        this.profilesActiveCrawls = CrawlSwitchboard.loadFromDB(profilesActiveFile);
        for (byte[] handle : this.profilesActiveCrawls.keySet()) {
            CrawlProfile p;
            try {
                p = new CrawlProfile(this.profilesActiveCrawls.get(handle));
            }
            catch (IOException | RuntimeException | SpaceExceededException e) {
                ConcurrentLog.warn("CrawlProfiles", "Could not load profile " + String.valueOf(handle), e);
                p = null;
            }
            if (p != null) continue;
        }
        this.initActiveCrawlProfiles();
        this.log.info("Loaded active crawl profiles from file " + profilesActiveFile.getName() + ", " + this.profilesActiveCrawls.size() + " entries");
        File profilesPassiveFile = new File(this.queuesRoot, DBFILE_PASSIVE_CRAWL_PROFILES);
        this.profilesPassiveCrawls = CrawlSwitchboard.loadFromDB(profilesPassiveFile);
        for (byte[] handle : this.profilesPassiveCrawls.keySet()) {
            try {
                CrawlProfile p = new CrawlProfile(this.profilesPassiveCrawls.get(handle));
                ConcurrentLog.info("CrawlProfiles", "loaded Profile " + p.handle() + ": " + p.collectionName());
            }
            catch (IOException e) {
            }
            catch (SpaceExceededException e) {}
        }
        this.log.info("Loaded passive crawl profiles from file " + profilesPassiveFile.getName() + ", " + this.profilesPassiveCrawls.size() + " entries, " + profilesPassiveFile.length() / 1024L);
    }

    public CrawlProfile get(byte[] profileKey) {
        CrawlProfile profile2 = this.getActive(profileKey);
        if (profile2 != null) {
            return profile2;
        }
        profile2 = this.getPassive(profileKey);
        if (profile2 == null) {
            return null;
        }
        this.putActive(profileKey, profile2);
        this.removePassive(profileKey);
        return profile2;
    }

    public CrawlProfile getActive(byte[] profileKey) {
        Map<String, String> m;
        if (profileKey == null) {
            return null;
        }
        CrawlProfile p = this.profilesActiveCrawlsCache.get(profileKey);
        if (p != null) {
            return p;
        }
        try {
            m = this.profilesActiveCrawls.get(profileKey);
        }
        catch (IOException e) {
            m = null;
        }
        catch (SpaceExceededException e) {
            m = null;
        }
        if (m == null) {
            return null;
        }
        p = new CrawlProfile(m);
        this.profilesActiveCrawlsCache.put(profileKey, p);
        return p;
    }

    public CrawlProfile getPassive(byte[] profileKey) {
        Map<String, String> m;
        if (profileKey == null) {
            return null;
        }
        try {
            m = this.profilesPassiveCrawls.get(profileKey);
        }
        catch (IOException e) {
            m = null;
        }
        catch (SpaceExceededException e) {
            m = null;
        }
        if (m == null) {
            return null;
        }
        return new CrawlProfile(m);
    }

    public Set<byte[]> getActive() {
        return this.profilesActiveCrawls.keySet();
    }

    public Set<byte[]> getPassive() {
        return this.profilesPassiveCrawls.keySet();
    }

    public void removeActive(byte[] profileKey) {
        if (profileKey == null) {
            return;
        }
        this.profilesActiveCrawlsCache.remove(profileKey);
        this.profilesActiveCrawls.remove(profileKey);
    }

    public void removePassive(byte[] profileKey) {
        if (profileKey == null) {
            return;
        }
        this.profilesPassiveCrawls.remove(profileKey);
    }

    public void putActive(byte[] profileKey, CrawlProfile profile2) {
        this.profilesActiveCrawls.put(profileKey, profile2);
        this.profilesActiveCrawlsCache.put(profileKey, profile2);
        this.removePassive(profileKey);
    }

    public void putPassive(byte[] profileKey, CrawlProfile profile2) {
        this.profilesPassiveCrawls.put(profileKey, profile2);
        this.removeActive(profileKey);
    }

    public RowHandleSet getURLHashes(byte[] profileKey) {
        return this.profilesActiveCrawlsCounter.get(ASCII.String(profileKey));
    }

    private void initActiveCrawlProfiles() {
        Switchboard sb = Switchboard.getSwitchboard();
        this.defaultAutocrawlDeepProfile = new CrawlProfile(CRAWL_PROFILE_AUTOCRAWL_DEEP, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, Integer.parseInt(sb.getConfig("autocrawl.deep.depth", "3")), true, CrawlProfile.getRecrawlDate(Integer.parseInt(sb.getConfig("autocrawl.days", "1")) * 1440), -1, true, true, true, false, sb.getConfigBool("autocrawl.index.text", true), sb.getConfigBool("autocrawl.index.media", true), false, false, -1, false, true, "", CacheStrategy.NOCACHE, "robot_autocrawlDeep", "YaCy Internet (cautious)", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultAutocrawlDeepProfile.handle()), this.defaultAutocrawlDeepProfile);
        this.defaultAutocrawlShallowProfile = new CrawlProfile(CRAWL_PROFILE_AUTOCRAWL_SHALLOW, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, Integer.parseInt(sb.getConfig("autocrawl.shallow.depth", "1")), true, CrawlProfile.getRecrawlDate(Integer.parseInt(sb.getConfig("autocrawl.days", "1")) * 1440), -1, true, true, true, false, sb.getConfigBool("autocrawl.index.text", true), sb.getConfigBool("autocrawl.index.media", true), false, false, -1, false, true, "", CacheStrategy.NOCACHE, "robot_autocrawlShallow", "YaCy Internet (cautious)", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultAutocrawlShallowProfile.handle()), this.defaultAutocrawlShallowProfile);
        this.defaultProxyProfile = new CrawlProfile(CRAWL_PROFILE_PROXY, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, Integer.parseInt(sb.getConfig("proxyPrefetchDepth", "0")), true, CrawlProfile.getRecrawlDate(1440L), -1, false, true, true, false, sb.getConfigBool("proxyIndexingLocalText", true), sb.getConfigBool("proxyIndexingLocalMedia", true), true, sb.getConfigBool("proxyIndexingRemote", false), -1, false, true, "", CacheStrategy.IFFRESH, "robot_proxy", "YaCyProxy", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultProxyProfile.handle()), this.defaultProxyProfile);
        this.defaultRemoteProfile = new CrawlProfile(CRAWL_PROFILE_REMOTE, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, 0, false, null, -1, true, true, true, false, true, true, false, false, -1, false, true, "", CacheStrategy.IFFRESH, "robot_remote", "YaCy Internet (cautious)", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultRemoteProfile.handle()), this.defaultRemoteProfile);
        this.defaultTextSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, 0, false, CrawlProfile.getRecrawlDate(43200L), -1, true, true, true, false, false, false, true, false, -1, false, true, "", CacheStrategy.IFEXIST, "robot_snippetLocalText", "YaCy Intranet (greedy)", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetLocalProfile.handle()), this.defaultTextSnippetLocalProfile);
        this.defaultTextSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, 0, false, CrawlProfile.getRecrawlDate(43200L), -1, true, true, true, false, true, true, true, false, -1, false, true, "", CacheStrategy.IFEXIST, "robot_snippetGlobalText", "YaCy Intranet (greedy)", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()), this.defaultTextSnippetGlobalProfile);
        this.defaultTextSnippetGlobalProfile.setCacheStrategy(CacheStrategy.IFEXIST);
        this.defaultRecrawlJobProfile = RecrawlBusyThread.buildDefaultCrawlProfile();
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultRecrawlJobProfile.handle()), this.defaultRecrawlJobProfile);
        this.defaultTextGreedyLearningProfile = new CrawlProfile(CRAWL_PROFILE_GREEDY_LEARNING_TEXT, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, 0, false, CrawlProfile.getRecrawlDate(43200L), -1, true, true, true, false, false, false, true, false, -1, false, true, "", CacheStrategy.IFEXIST, "robot_snippetGreedyLearningText", "Random Browser", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultTextSnippetGlobalProfile.handle()), this.defaultTextSnippetGlobalProfile);
        this.defaultMediaSnippetLocalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, 0, false, CrawlProfile.getRecrawlDate(43200L), -1, true, true, true, false, false, false, true, false, -1, false, true, "", CacheStrategy.IFEXIST, "robot_snippetLocalMedia", "YaCy Intranet (greedy)", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetLocalProfile.handle()), this.defaultMediaSnippetLocalProfile);
        this.defaultMediaSnippetGlobalProfile = new CrawlProfile(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, 0, false, CrawlProfile.getRecrawlDate(43200L), -1, true, true, true, false, false, true, true, false, -1, false, true, "", CacheStrategy.IFEXIST, "robot_snippetGlobalMedia", "YaCy Intranet (greedy)", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultMediaSnippetGlobalProfile.handle()), this.defaultMediaSnippetGlobalProfile);
        this.defaultPackProfile = new CrawlProfile(CRAWL_PROFILE_PACKS, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, 0, false, CrawlProfile.getRecrawlDate(43200L), -1, true, true, false, false, true, false, false, false, -1, false, true, "", CacheStrategy.NOCACHE, "robot_packs", "YaCy Intranet (greedy)", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(this.defaultPackProfile.handle()), this.defaultPackProfile);
    }

    public CrawlProfile getPushCrawlProfile(String collection) {
        CrawlProfile genericPushProfile = this.defaultPushProfiles.get(collection);
        if (genericPushProfile != null) {
            return genericPushProfile;
        }
        genericPushProfile = new CrawlProfile("push_" + collection, ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, 0, false, null, -1, true, true, false, false, true, true, false, false, -1, false, true, "", CacheStrategy.NOCACHE, collection, "YaCy Intranet (greedy)", TagValency.EVAL, null, null, 0);
        this.profilesActiveCrawls.put(UTF8.getBytes(genericPushProfile.handle()), genericPushProfile);
        this.defaultPushProfiles.put(collection, genericPushProfile);
        return genericPushProfile;
    }

    private void resetProfiles() {
        this.profilesActiveCrawlsCache.clear();
        File pdb = new File(this.queuesRoot, DBFILE_ACTIVE_CRAWL_PROFILES);
        if (pdb.exists()) {
            FileUtils.deletedelete(pdb);
        }
        try {
            this.profilesActiveCrawls = new MapHeap(pdb, 12, NaturalOrder.naturalOrder, 65536, 500, ' ');
        }
        catch (IOException e1) {
            ConcurrentLog.logException(e1);
            this.profilesActiveCrawls = null;
        }
        this.initActiveCrawlProfiles();
    }

    public boolean clear() throws InterruptedException {
        this.profilesActiveCrawlsCache.clear();
        boolean hasDoneSomething = false;
        try {
            for (byte[] handle : this.profilesActiveCrawls.keySet()) {
                CrawlProfile entry2;
                if (Thread.currentThread().isInterrupted()) {
                    throw new InterruptedException("Shutdown in progress");
                }
                try {
                    entry2 = new CrawlProfile(this.profilesActiveCrawls.get(handle));
                }
                catch (IOException e) {
                    continue;
                }
                catch (SpaceExceededException e) {
                    continue;
                }
                if (DEFAULT_PROFILES.contains(entry2.name())) continue;
                CrawlProfile p = new CrawlProfile(entry2);
                this.profilesPassiveCrawls.put(UTF8.getBytes(p.handle()), p);
                this.profilesActiveCrawls.remove(handle);
                hasDoneSomething = true;
            }
        }
        catch (kelondroException e) {
            this.resetProfiles();
            hasDoneSomething = true;
        }
        return hasDoneSomething;
    }

    public Set<String> getActiveProfiles() {
        HashSet<String> profileKeys = new HashSet<String>();
        for (byte[] handle : this.getActive()) {
            CrawlProfile entry2 = new CrawlProfile(this.getActive(handle));
            if (DEFAULT_PROFILES.contains(entry2.name())) continue;
            profileKeys.add(ASCII.String(handle));
        }
        return profileKeys;
    }

    public Set<String> getFinishedProfiles(CrawlQueues crawlQueues) {
        this.profilesActiveCrawlsCounter.clear();
        Set<String> deletionCandidate = this.getActiveProfiles();
        if (deletionCandidate.size() == 0) {
            return new HashSet<String>(0);
        }
        long timeout = System.currentTimeMillis() + 60000L;
        try {
            for (NoticedURL.StackType stack : NoticedURL.StackType.values()) {
                Iterator<Request> sei = crawlQueues.noticeURL.iterator(stack);
                if (sei == null) continue;
                while (sei.hasNext()) {
                    Request r = sei.next();
                    if (r == null) continue;
                    String handle = r.profileHandle();
                    RowHandleSet us = this.profilesActiveCrawlsCounter.get(handle);
                    if (us == null) {
                        us = new RowHandleSet(12, (ByteOrder)Word.commonHashOrder, 0);
                        this.profilesActiveCrawlsCounter.put(handle, us);
                    }
                    if (us.size() < 100) {
                        us.put(r.url().hash());
                    }
                    deletionCandidate.remove(handle);
                    if (deletionCandidate.size() == 0) {
                        return new HashSet<String>(0);
                    }
                    if (System.currentTimeMillis() <= timeout) continue;
                    return new HashSet<String>(0);
                }
                if (deletionCandidate.size() != 0) continue;
                return new HashSet<String>(0);
            }
            Map<DigestURL, Request> map = this.switchboard.crawlQueues.activeWorkerEntries();
            for (Request request : map.values()) {
                deletionCandidate.remove(request.profileHandle());
            }
        }
        catch (Throwable e) {
            ConcurrentLog.logException(e);
            return new HashSet<String>(0);
        }
        return deletionCandidate;
    }

    public boolean allCrawlsFinished(CrawlQueues crawlQueues) {
        if (!crawlQueues.noticeURL.isEmpty()) {
            return false;
        }
        return this.switchboard.crawlQueues.activeWorkerEntries().size() <= 0;
    }

    public void cleanProfiles(Set<String> deletionCandidate) {
        for (String h : deletionCandidate) {
            byte[] handle = ASCII.getBytes(h);
            CrawlProfile p = this.getActive(handle);
            if (p == null) continue;
            this.putPassive(handle, p);
            this.removeActive(handle);
        }
    }

    public synchronized void close() {
        this.profilesActiveCrawlsCache.clear();
        this.profilesActiveCrawls.close();
        this.profilesPassiveCrawls.close();
    }

    private static MapHeap loadFromDB(File file) {
        MapHeap ret;
        try {
            ret = new MapHeap(file, 12, NaturalOrder.naturalOrder, 65536, 500, ' ');
        }
        catch (IOException e) {
            ConcurrentLog.logException(e);
            FileUtils.deletedelete(file);
            try {
                ret = new MapHeap(file, 12, NaturalOrder.naturalOrder, 65536, 500, ' ');
            }
            catch (IOException e1) {
                ConcurrentLog.logException(e1);
                ret = null;
            }
        }
        return ret;
    }

    static {
        DEFAULT_PROFILES.add(CRAWL_PROFILE_AUTOCRAWL_DEEP);
        DEFAULT_PROFILES.add(CRAWL_PROFILE_AUTOCRAWL_SHALLOW);
        DEFAULT_PROFILES.add(CRAWL_PROFILE_RECRAWL_JOB);
        DEFAULT_PROFILES.add(CRAWL_PROFILE_PROXY);
        DEFAULT_PROFILES.add(CRAWL_PROFILE_REMOTE);
        DEFAULT_PROFILES.add(CRAWL_PROFILE_SNIPPET_LOCAL_TEXT);
        DEFAULT_PROFILES.add(CRAWL_PROFILE_SNIPPET_GLOBAL_TEXT);
        DEFAULT_PROFILES.add(CRAWL_PROFILE_GREEDY_LEARNING_TEXT);
        DEFAULT_PROFILES.add(CRAWL_PROFILE_SNIPPET_LOCAL_MEDIA);
        DEFAULT_PROFILES.add(CRAWL_PROFILE_SNIPPET_GLOBAL_MEDIA);
        DEFAULT_PROFILES.add(CRAWL_PROFILE_PACKS);
    }
}

