/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.crawler.data;

import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import net.yacy.cora.document.id.MultiProtocolURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.order.Digest;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.util.CommonPattern;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.CrawlSwitchboard;
import net.yacy.document.VocabularyScraper;
import net.yacy.document.parser.html.TagValency;
import net.yacy.search.query.QueryParams;
import net.yacy.server.serverObjects;
import org.json.JSONArray;
import org.json.JSONException;
import org.json.JSONTokener;

public class CrawlProfile
extends ConcurrentHashMap<String, String>
implements Map<String, String>,
Cloneable {
    private static final long serialVersionUID = 5527325718810703504L;
    public static final String MATCH_ALL_STRING = ".*";
    public static final String MATCH_NEVER_STRING = "";
    public static final String SOLR_EMPTY_QUERY = "";
    public static final String SOLR_MATCH_ALL_QUERY = "*:*";
    public static final Pattern MATCH_ALL_PATTERN = Pattern.compile(".*");
    public static final Pattern MATCH_NEVER_PATTERN = Pattern.compile("");
    public static final String CRAWL_PROFILE_PUSH_STUB = "push_";
    private Pattern crawlerurlmustmatch = null;
    private Pattern crawlerurlmustnotmatch = null;
    private Pattern crawlerOriginUrlMustMatch = null;
    private Pattern crawlerOriginUrlMustNotMatch = null;
    private Pattern crawleripmustmatch = null;
    private Pattern crawleripmustnotmatch = null;
    private Pattern crawlernodepthlimitmatch = null;
    private Pattern indexurlmustmatch = null;
    private Pattern indexurlmustnotmatch = null;
    private Pattern indexcontentmustmatch = null;
    private Pattern indexcontentmustnotmatch = null;
    private Pattern indexMediaTypeMustMatch = null;
    private Pattern indexMediaTypeMustNotMatch = null;
    private Pattern snapshotsMustnotmatch = null;
    private final Map<String, AtomicInteger> doms;
    private final TagValency defaultValency;
    private final Set<String> valencySwitchTagNames;
    private final VocabularyScraper scraper;
    private Map<String, Pattern> cmap = null;

    public CrawlProfile(String name, String crawlerUrlMustMatch, String crawlerUrlMustNotMatch, String crawlerIpMustMatch, String crawlerIpMustNotMatch, String crawlerCountryMustMatch, String crawlerNoDepthLimitMatch, String indexUrlMustMatch, String indexUrlMustNotMatch, String indexContentMustMatch, String indexContentMustNotMatch, boolean noindexWhenCanonicalUnequalURL, int depth, boolean directDocByURL, Date recrawlIfOlder, int domMaxPages, boolean crawlingQ, boolean followFrames, boolean obeyHtmlRobotsNoindex, boolean obeyHtmlRobotsNofollow, boolean indexText, boolean indexMedia, boolean storeHTCache, boolean remoteIndexing, int snapshotsMaxDepth, boolean snapshotsLoadImage, boolean snapshotsReplaceOld, String snapshotsMustnotmatch, CacheStrategy cacheStrategy, String collections, String userAgentName, TagValency defaultValency, Set<String> valencySwitchTagNames, VocabularyScraper scraper, int timezoneOffset) {
        super(40);
        if (name == null || name.isEmpty()) {
            throw new NullPointerException("name must not be null or empty");
        }
        if (name.length() > 256) {
            name = name.substring(256);
        }
        this.doms = new ConcurrentHashMap<String, AtomicInteger>();
        String handle = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw(name + crawlerUrlMustMatch + depth + crawlerUrlMustNotMatch + domMaxPages + collections)).substring(0, 12);
        this.put(CrawlAttribute.HANDLE.key, handle);
        this.put(CrawlAttribute.NAME.key, name);
        this.put(CrawlAttribute.AGENT_NAME.key, userAgentName);
        this.put(CrawlAttribute.CRAWLER_ALWAYS_CHECK_MEDIA_TYPE.key, true);
        this.put(CrawlAttribute.CRAWLER_URL_MUSTMATCH.key, crawlerUrlMustMatch == null ? MATCH_ALL_STRING : crawlerUrlMustMatch);
        this.put(CrawlAttribute.CRAWLER_URL_MUSTNOTMATCH.key, crawlerUrlMustNotMatch == null ? "" : crawlerUrlMustNotMatch);
        this.put(CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTMATCH.key, crawlerUrlMustMatch == null ? MATCH_ALL_STRING : crawlerUrlMustMatch);
        this.put(CrawlAttribute.CRAWLER_URL_MUSTNOTMATCH.key, crawlerUrlMustNotMatch == null ? "" : crawlerUrlMustNotMatch);
        this.put(CrawlAttribute.CRAWLER_IP_MUSTMATCH.key, crawlerIpMustMatch == null ? MATCH_ALL_STRING : crawlerIpMustMatch);
        this.put(CrawlAttribute.CRAWLER_IP_MUSTNOTMATCH.key, crawlerIpMustNotMatch == null ? "" : crawlerIpMustNotMatch);
        this.put(CrawlAttribute.CRAWLER_COUNTRY_MUSTMATCH.key, crawlerCountryMustMatch == null ? "" : crawlerCountryMustMatch);
        this.put(CrawlAttribute.CRAWLER_URL_NODEPTHLIMITMATCH.key, crawlerNoDepthLimitMatch == null ? "" : crawlerNoDepthLimitMatch);
        this.put(CrawlAttribute.INDEXING_URL_MUSTMATCH.key, indexUrlMustMatch == null ? "" : indexUrlMustMatch);
        this.put(CrawlAttribute.INDEXING_URL_MUSTNOTMATCH.key, indexUrlMustNotMatch == null ? "" : indexUrlMustNotMatch);
        this.put(CrawlAttribute.INDEXING_CONTENT_MUSTMATCH.key, indexContentMustMatch == null ? "" : indexContentMustMatch);
        this.put(CrawlAttribute.INDEXING_CONTENT_MUSTNOTMATCH.key, indexContentMustNotMatch == null ? "" : indexContentMustNotMatch);
        this.put(CrawlAttribute.DEPTH.key, depth);
        this.put(CrawlAttribute.DIRECT_DOC_BY_URL.key, directDocByURL);
        this.put(CrawlAttribute.RECRAWL_IF_OLDER.key, recrawlIfOlder == null ? Long.MAX_VALUE : recrawlIfOlder.getTime());
        this.put(CrawlAttribute.DOM_MAX_PAGES.key, domMaxPages);
        this.put(CrawlAttribute.CRAWLING_Q.key, crawlingQ);
        this.put(CrawlAttribute.FOLLOW_FRAMES.key, followFrames);
        this.put(CrawlAttribute.OBEY_HTML_ROBOTS_NOINDEX.key, obeyHtmlRobotsNoindex);
        this.put(CrawlAttribute.OBEY_HTML_ROBOTS_NOFOLLOW.key, obeyHtmlRobotsNofollow);
        this.put(CrawlAttribute.INDEX_TEXT.key, indexText);
        this.put(CrawlAttribute.INDEX_MEDIA.key, indexMedia);
        this.put(CrawlAttribute.STORE_HTCACHE.key, storeHTCache);
        this.put(CrawlAttribute.REMOTE_INDEXING.key, remoteIndexing);
        this.put(CrawlAttribute.SNAPSHOTS_MAXDEPTH.key, snapshotsMaxDepth);
        this.put(CrawlAttribute.SNAPSHOTS_LOADIMAGE.key, snapshotsLoadImage);
        this.put(CrawlAttribute.SNAPSHOTS_REPLACEOLD.key, snapshotsReplaceOld);
        this.put(CrawlAttribute.SNAPSHOTS_MUSTNOTMATCH.key, snapshotsMustnotmatch);
        this.put(CrawlAttribute.CACHE_STRAGEGY.key, cacheStrategy.toString());
        this.put(CrawlAttribute.COLLECTIONS.key, CommonPattern.SPACE.matcher(collections.trim()).replaceAll(""));
        this.defaultValency = defaultValency;
        this.valencySwitchTagNames = valencySwitchTagNames == null ? new HashSet() : valencySwitchTagNames;
        String jsonString = new JSONArray(valencySwitchTagNames).toString();
        this.put(CrawlAttribute.DEFAULT_VALENCY.key, defaultValency.name());
        this.put(CrawlAttribute.VALENCY_SWITCH_TAG_NAMES.key, jsonString);
        this.scraper = scraper == null ? new VocabularyScraper() : scraper;
        jsonString = this.scraper.toString();
        assert (jsonString != null && jsonString.length() > 0 && jsonString.charAt(0) == '{') : "jsonString = " + jsonString;
        this.put(CrawlAttribute.SCRAPER.key, jsonString);
        this.put(CrawlAttribute.TIMEZONEOFFSET.key, timezoneOffset);
        this.put(CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTMATCH.key, MATCH_ALL_STRING);
        this.put(CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTNOTMATCH.key, "");
        this.put(CrawlAttribute.INDEXING_SOLR_QUERY_MUSTMATCH.key, SOLR_MATCH_ALL_QUERY);
        this.put(CrawlAttribute.INDEXING_SOLR_QUERY_MUSTNOTMATCH.key, "");
        this.put(CrawlAttribute.NOINDEX_WHEN_CANONICAL_UNEQUAL_URL.key, noindexWhenCanonicalUnequalURL);
    }

    public CrawlProfile(Map<String, String> ext) {
        super(ext == null ? 1 : ext.size());
        JSONArray a;
        if (ext != null) {
            this.putAll(ext);
        }
        this.doms = new ConcurrentHashMap<String, AtomicInteger>();
        String defaultValency = ext.get(CrawlAttribute.DEFAULT_VALENCY.key);
        this.defaultValency = defaultValency == null || defaultValency.length() == 0 ? TagValency.EVAL : TagValency.valueOf(defaultValency);
        String jsonString = ext.get(CrawlAttribute.VALENCY_SWITCH_TAG_NAMES.key);
        if (jsonString == null) {
            a = new JSONArray();
        } else {
            try {
                a = new JSONArray(new JSONTokener(jsonString));
            }
            catch (JSONException e) {
                ConcurrentLog.logException(e);
                a = new JSONArray();
            }
        }
        this.valencySwitchTagNames = new HashSet<String>();
        for (int i = 0; i < a.length(); ++i) {
            try {
                this.valencySwitchTagNames.add(a.getString(i));
                continue;
            }
            catch (JSONException jSONException) {
                // empty catch block
            }
        }
        jsonString = ext.get(CrawlAttribute.SCRAPER.key);
        if (jsonString == null || jsonString.length() == 0) {
            this.scraper = new VocabularyScraper();
        } else {
            VocabularyScraper loadedScraper;
            try {
                loadedScraper = new VocabularyScraper(jsonString);
            }
            catch (JSONException e) {
                ConcurrentLog.logException(e);
                loadedScraper = new VocabularyScraper();
            }
            this.scraper = loadedScraper;
        }
    }

    @Override
    public Object clone() {
        ConcurrentHashMap<String, String> m = new ConcurrentHashMap<String, String>(this);
        CrawlProfile cp = new CrawlProfile((Map<String, String>)m);
        return cp;
    }

    public TagValency defaultValency() {
        return this.defaultValency;
    }

    public Set<String> valencySwitchTagNames() {
        return this.valencySwitchTagNames;
    }

    public VocabularyScraper scraper() {
        return this.scraper;
    }

    public void domInc(String domain) {
        if (domain == null) {
            return;
        }
        AtomicInteger dp = this.doms.get(domain);
        if (dp == null) {
            this.doms.put(domain, new AtomicInteger(1));
        } else {
            dp.incrementAndGet();
        }
    }

    private String domName(boolean attr, int index2) {
        Map.Entry<String, AtomicInteger> ey;
        Iterator<Map.Entry<String, AtomicInteger>> domnamesi = this.doms.entrySet().iterator();
        Object domname = "";
        for (int i = 0; domnamesi.hasNext() && i < index2; ++i) {
            ey = domnamesi.next();
        }
        if (domnamesi.hasNext()) {
            ey = domnamesi.next();
            AtomicInteger dp = ey.getValue();
            domname = ey.getKey() + (String)(attr ? "/c=" + dp.get() : " ");
        }
        return domname;
    }

    public ClientIdentification.Agent getAgent() {
        String agentName = (String)this.get(CrawlAttribute.AGENT_NAME.key);
        return ClientIdentification.getAgent(agentName);
    }

    public AtomicInteger getCount(String domain) {
        if (domain == null) {
            return new AtomicInteger(0);
        }
        AtomicInteger dp = this.doms.get(domain);
        if (dp == null) {
            dp = new AtomicInteger(0);
            this.doms.put(domain, dp);
        }
        return dp;
    }

    @Override
    public final void put(String key, boolean value) {
        super.put(key, Boolean.toString(value));
    }

    @Override
    private final void put(String key, int value) {
        super.put(key, Integer.toString(value));
    }

    @Override
    private final void put(String key, long value) {
        super.put(key, Long.toString(value));
    }

    public String handle() {
        String r = (String)this.get(CrawlAttribute.HANDLE.key);
        assert (r != null);
        return r;
    }

    public void setHandle() {
        String handle = Base64Order.enhancedCoder.encode(Digest.encodeMD5Raw((String)this.get(CrawlAttribute.NAME.key) + (String)this.get(CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTMATCH.key) + (String)this.get(CrawlAttribute.DEPTH.key) + (String)this.get(CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTNOTMATCH.key) + this.domMaxPages() + (String)this.get(CrawlAttribute.COLLECTIONS.key))).substring(0, 12);
        assert (handle != null && handle.length() == 12) : handle + " != 12";
        this.put(CrawlAttribute.HANDLE.key, handle);
    }

    public Map<String, Pattern> collections() {
        if (this.cmap != null) {
            return this.cmap;
        }
        String r = (String)this.get(CrawlAttribute.COLLECTIONS.key);
        this.cmap = CrawlProfile.collectionParser(r);
        return this.cmap;
    }

    public static Map<String, Pattern> collectionParser(String collectionString) {
        if (collectionString == null || collectionString.length() == 0) {
            return new HashMap<String, Pattern>();
        }
        String[] cs = CommonPattern.COMMA.split(collectionString);
        LinkedHashMap<String, Pattern> cm = new LinkedHashMap<String, Pattern>();
        for (String c : cs) {
            int p = c.indexOf(58);
            if (p < 0) {
                cm.put(c, QueryParams.catchall_pattern);
                continue;
            }
            cm.put(c.substring(0, p), Pattern.compile(c.substring(p + 1)));
        }
        return cm;
    }

    public void setCollections(String collectionsList) {
        this.put(CrawlAttribute.COLLECTIONS.key, CommonPattern.SPACE.matcher(collectionsList.trim()).replaceAll(""));
    }

    public String name() {
        String r = (String)this.get(CrawlAttribute.NAME.key);
        if (r == null) {
            return "";
        }
        return r;
    }

    public String collectionName() {
        String r = (String)this.get(CrawlAttribute.COLLECTIONS.key);
        return r == null || r.length() == 0 || "user".equals(r) ? this.name() : r;
    }

    public Pattern urlMustMatchPattern() {
        if (this.crawlerurlmustmatch == null) {
            String r = (String)this.get(CrawlAttribute.CRAWLER_URL_MUSTMATCH.key);
            try {
                this.crawlerurlmustmatch = r == null || r.equals(MATCH_ALL_STRING) ? MATCH_ALL_PATTERN : Pattern.compile(r, 2);
            }
            catch (PatternSyntaxException e) {
                this.crawlerurlmustmatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.crawlerurlmustmatch;
    }

    public String formattedUrlMustMatchPattern() {
        Object patternStr = this.urlMustMatchPattern().toString();
        if (((String)patternStr).length() > 1000) {
            patternStr = ((String)patternStr).substring(0, Math.min(((String)patternStr).length(), 1000)) + "...";
        }
        return patternStr;
    }

    public Pattern urlMustNotMatchPattern() {
        if (this.crawlerurlmustnotmatch == null) {
            String r = (String)this.get(CrawlAttribute.CRAWLER_URL_MUSTNOTMATCH.key);
            try {
                this.crawlerurlmustnotmatch = r == null || r.equals("") ? MATCH_NEVER_PATTERN : Pattern.compile(r, 2);
            }
            catch (PatternSyntaxException e) {
                this.crawlerurlmustnotmatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.crawlerurlmustnotmatch;
    }

    public Pattern getCrawlerOriginUrlMustMatchPattern() {
        if (this.crawlerOriginUrlMustMatch == null) {
            String patternStr = (String)this.get(CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTMATCH.key);
            try {
                this.crawlerOriginUrlMustMatch = patternStr == null || patternStr.equals(MATCH_ALL_STRING) ? MATCH_ALL_PATTERN : Pattern.compile(patternStr, 2);
            }
            catch (PatternSyntaxException e) {
                this.crawlerOriginUrlMustMatch = MATCH_ALL_PATTERN;
            }
        }
        return this.crawlerOriginUrlMustMatch;
    }

    public Pattern getCrawlerOriginUrlMustNotMatchPattern() {
        if (this.crawlerOriginUrlMustNotMatch == null) {
            String patternStr = (String)this.get(CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTNOTMATCH.key);
            try {
                this.crawlerOriginUrlMustNotMatch = patternStr == null || patternStr.equals("") ? MATCH_NEVER_PATTERN : Pattern.compile(patternStr, 2);
            }
            catch (PatternSyntaxException e) {
                this.crawlerOriginUrlMustNotMatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.crawlerOriginUrlMustNotMatch;
    }

    public Pattern ipMustMatchPattern() {
        if (this.crawleripmustmatch == null) {
            String r = (String)this.get(CrawlAttribute.CRAWLER_IP_MUSTMATCH.key);
            try {
                this.crawleripmustmatch = r == null || r.equals(MATCH_ALL_STRING) ? MATCH_ALL_PATTERN : Pattern.compile(r, 2);
            }
            catch (PatternSyntaxException e) {
                this.crawleripmustmatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.crawleripmustmatch;
    }

    public Pattern ipMustNotMatchPattern() {
        if (this.crawleripmustnotmatch == null) {
            String r = (String)this.get(CrawlAttribute.CRAWLER_IP_MUSTNOTMATCH.key);
            try {
                this.crawleripmustnotmatch = r == null || r.equals("") ? MATCH_NEVER_PATTERN : Pattern.compile(r, 2);
            }
            catch (PatternSyntaxException e) {
                this.crawleripmustnotmatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.crawleripmustnotmatch;
    }

    public String[] countryMustMatchList() {
        String countryMustMatch = (String)this.get(CrawlAttribute.CRAWLER_COUNTRY_MUSTMATCH.key);
        if (countryMustMatch == null) {
            countryMustMatch = "";
        }
        if (countryMustMatch.isEmpty()) {
            return new String[0];
        }
        String[] list2 = CommonPattern.COMMA.split(countryMustMatch);
        if (list2.length == 1 && list2.length == 0) {
            list2 = new String[]{};
        }
        return list2;
    }

    public Pattern crawlerNoDepthLimitMatchPattern() {
        if (this.crawlernodepthlimitmatch == null) {
            String r = (String)this.get(CrawlAttribute.CRAWLER_URL_NODEPTHLIMITMATCH.key);
            try {
                this.crawlernodepthlimitmatch = r == null || r.equals("") ? MATCH_NEVER_PATTERN : Pattern.compile(r, 2);
            }
            catch (PatternSyntaxException e) {
                this.crawlernodepthlimitmatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.crawlernodepthlimitmatch;
    }

    public Pattern indexUrlMustMatchPattern() {
        if (this.indexurlmustmatch == null) {
            String r = (String)this.get(CrawlAttribute.INDEXING_URL_MUSTMATCH.key);
            try {
                this.indexurlmustmatch = r == null || r.equals(MATCH_ALL_STRING) ? MATCH_ALL_PATTERN : Pattern.compile(r, 2);
            }
            catch (PatternSyntaxException e) {
                this.indexurlmustmatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.indexurlmustmatch;
    }

    public Pattern indexUrlMustNotMatchPattern() {
        if (this.indexurlmustnotmatch == null) {
            String r = (String)this.get(CrawlAttribute.INDEXING_URL_MUSTNOTMATCH.key);
            try {
                this.indexurlmustnotmatch = r == null || r.equals("") ? MATCH_NEVER_PATTERN : Pattern.compile(r, 2);
            }
            catch (PatternSyntaxException e) {
                this.indexurlmustnotmatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.indexurlmustnotmatch;
    }

    public Pattern indexContentMustMatchPattern() {
        if (this.indexcontentmustmatch == null) {
            String r = (String)this.get(CrawlAttribute.INDEXING_CONTENT_MUSTMATCH.key);
            try {
                this.indexcontentmustmatch = r == null || r.equals(MATCH_ALL_STRING) ? MATCH_ALL_PATTERN : Pattern.compile(r, 2);
            }
            catch (PatternSyntaxException e) {
                this.indexcontentmustmatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.indexcontentmustmatch;
    }

    public Pattern indexContentMustNotMatchPattern() {
        if (this.indexcontentmustnotmatch == null) {
            String r = (String)this.get(CrawlAttribute.INDEXING_CONTENT_MUSTNOTMATCH.key);
            try {
                this.indexcontentmustnotmatch = r == null || r.equals("") ? MATCH_NEVER_PATTERN : Pattern.compile(r, 2);
            }
            catch (PatternSyntaxException e) {
                this.indexcontentmustnotmatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.indexcontentmustnotmatch;
    }

    public Pattern getIndexMediaTypeMustMatchPattern() {
        if (this.indexMediaTypeMustMatch == null) {
            String patternStr = (String)this.get(CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTMATCH.key);
            try {
                this.indexMediaTypeMustMatch = patternStr == null || patternStr.equals(MATCH_ALL_STRING) ? MATCH_ALL_PATTERN : Pattern.compile(patternStr, 2);
            }
            catch (PatternSyntaxException e) {
                this.indexMediaTypeMustMatch = MATCH_ALL_PATTERN;
            }
        }
        return this.indexMediaTypeMustMatch;
    }

    public Pattern getIndexMediaTypeMustNotMatchPattern() {
        if (this.indexMediaTypeMustNotMatch == null) {
            String patternStr = (String)this.get(CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTNOTMATCH.key);
            try {
                this.indexMediaTypeMustNotMatch = patternStr == null || patternStr.equals("") ? MATCH_NEVER_PATTERN : Pattern.compile(patternStr, 2);
            }
            catch (PatternSyntaxException e) {
                this.indexMediaTypeMustNotMatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.indexMediaTypeMustNotMatch;
    }

    public int depth() {
        String r = (String)this.get(CrawlAttribute.DEPTH.key);
        if (r == null) {
            return 0;
        }
        try {
            return Integer.parseInt(r);
        }
        catch (NumberFormatException e) {
            ConcurrentLog.logException(e);
            return 0;
        }
    }

    public boolean isIndexNonParseableUrls() {
        String r = (String)this.get(CrawlAttribute.DIRECT_DOC_BY_URL.key);
        if (r == null) {
            return false;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public boolean isCrawlerAlwaysCheckMediaType() {
        String r = (String)this.get(CrawlAttribute.CRAWLER_ALWAYS_CHECK_MEDIA_TYPE.key);
        if (r == null) {
            return false;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public CacheStrategy cacheStrategy() {
        String r = (String)this.get(CrawlAttribute.CACHE_STRAGEGY.key);
        if (r == null) {
            return CacheStrategy.IFEXIST;
        }
        try {
            return CacheStrategy.decode(Integer.parseInt(r));
        }
        catch (NumberFormatException e) {
            ConcurrentLog.logException(e);
            return CacheStrategy.IFEXIST;
        }
    }

    public void setCacheStrategy(CacheStrategy newStrategy) {
        this.put(CrawlAttribute.CACHE_STRAGEGY.key, newStrategy.toString());
    }

    public long recrawlIfOlder() {
        String r = (String)this.get(CrawlAttribute.RECRAWL_IF_OLDER.key);
        if (r == null) {
            return 0L;
        }
        try {
            long l = Long.parseLong(r);
            return l < 0L ? 0L : l;
        }
        catch (NumberFormatException e) {
            ConcurrentLog.logException(e);
            return 0L;
        }
    }

    public int domMaxPages() {
        String r = (String)this.get(CrawlAttribute.DOM_MAX_PAGES.key);
        if (r == null) {
            return Integer.MAX_VALUE;
        }
        try {
            int i = Integer.parseInt(r);
            if (i < 0) {
                return Integer.MAX_VALUE;
            }
            return i;
        }
        catch (NumberFormatException e) {
            ConcurrentLog.logException(e);
            return Integer.MAX_VALUE;
        }
    }

    public boolean crawlingQ() {
        String r = (String)this.get(CrawlAttribute.CRAWLING_Q.key);
        if (r == null) {
            return false;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public boolean followFrames() {
        String r = (String)this.get(CrawlAttribute.FOLLOW_FRAMES.key);
        if (r == null) {
            return false;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public boolean obeyHtmlRobotsNoindex() {
        String r = (String)this.get(CrawlAttribute.OBEY_HTML_ROBOTS_NOINDEX.key);
        if (r == null) {
            return false;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public boolean obeyHtmlRobotsNofollow() {
        String r = (String)this.get(CrawlAttribute.OBEY_HTML_ROBOTS_NOFOLLOW.key);
        if (r == null) {
            return false;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public boolean indexText() {
        String r = (String)this.get(CrawlAttribute.INDEX_TEXT.key);
        if (r == null) {
            return true;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public boolean indexMedia() {
        String r = (String)this.get(CrawlAttribute.INDEX_MEDIA.key);
        if (r == null) {
            return true;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public boolean noindexWhenCanonicalUnequalURL() {
        String r = (String)this.get(CrawlAttribute.NOINDEX_WHEN_CANONICAL_UNEQUAL_URL.key);
        if (r == null) {
            return true;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public boolean storeHTCache() {
        String r = (String)this.get(CrawlAttribute.STORE_HTCACHE.key);
        if (r == null) {
            return false;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public boolean remoteIndexing() {
        String r = (String)this.get(CrawlAttribute.REMOTE_INDEXING.key);
        if (r == null) {
            return false;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public int snapshotMaxdepth() {
        String r = (String)this.get(CrawlAttribute.SNAPSHOTS_MAXDEPTH.key);
        if (r == null) {
            return -1;
        }
        try {
            int i = Integer.parseInt(r);
            if (i < 0) {
                return -1;
            }
            return i;
        }
        catch (NumberFormatException e) {
            ConcurrentLog.logException(e);
            return -1;
        }
    }

    public boolean snapshotLoadImage() {
        String r = (String)this.get(CrawlAttribute.SNAPSHOTS_LOADIMAGE.key);
        if (r == null) {
            return false;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public boolean snapshotReplaceold() {
        String r = (String)this.get(CrawlAttribute.SNAPSHOTS_REPLACEOLD.key);
        if (r == null) {
            return false;
        }
        return r.equals(Boolean.TRUE.toString());
    }

    public Pattern snapshotsMustnotmatch() {
        if (this.snapshotsMustnotmatch == null) {
            String r = (String)this.get(CrawlAttribute.SNAPSHOTS_MUSTNOTMATCH.key);
            try {
                this.snapshotsMustnotmatch = r == null || r.equals(MATCH_ALL_STRING) ? MATCH_ALL_PATTERN : Pattern.compile(r, 2);
            }
            catch (PatternSyntaxException e) {
                this.snapshotsMustnotmatch = MATCH_NEVER_PATTERN;
            }
        }
        return this.snapshotsMustnotmatch;
    }

    public int timezoneOffset() {
        String timezoneOffset = (String)this.get(CrawlAttribute.TIMEZONEOFFSET.key);
        if (timezoneOffset == null) {
            return 0;
        }
        try {
            return Integer.parseInt(timezoneOffset);
        }
        catch (NumberFormatException e) {
            return 0;
        }
    }

    public static Date getRecrawlDate(long oldTimeMinutes) {
        return new Date(System.currentTimeMillis() - 60000L * oldTimeMinutes);
    }

    public static String siteFilter(Collection<? extends MultiProtocolURL> urls2) {
        StringBuilder filter = new StringBuilder();
        filter.append("(smb|ftp|https?)://(www.)?(");
        for (MultiProtocolURL multiProtocolURL : urls2) {
            String host = multiProtocolURL.getHost();
            if (host == null) continue;
            if (host.startsWith("www.")) {
                host = host.substring(4);
            }
            filter.append(Pattern.quote(host.toLowerCase(Locale.ROOT))).append(".*|");
        }
        filter.setCharAt(filter.length() - 1, ')');
        return filter.toString();
    }

    public static String mustMatchFilterFullDomain(MultiProtocolURL url) {
        String protocol;
        String host = url.getHost();
        if (host == null) {
            return url.getProtocol() + MATCH_ALL_STRING;
        }
        if (host.startsWith("www.")) {
            host = host.substring(4);
        }
        if ("http".equals(protocol = url.getProtocol()) || "https".equals(protocol)) {
            protocol = "https?+";
        }
        return new StringBuilder(host.length() + 20).append(protocol).append("://(www.)?").append(Pattern.quote(host)).append(MATCH_ALL_STRING).toString();
    }

    public static String subpathFilter(Collection<? extends MultiProtocolURL> urls2) {
        LinkedHashSet<String> filters = new LinkedHashSet<String>();
        for (MultiProtocolURL multiProtocolURL : urls2) {
            filters.add(CrawlProfile.mustMatchSubpath(multiProtocolURL));
        }
        StringBuilder filter = new StringBuilder();
        for (String urlfilter : filters) {
            filter.append('|').append(urlfilter);
        }
        return filter.length() > 0 ? filter.substring(1) : MATCH_ALL_STRING;
    }

    public static String mustMatchSubpath(MultiProtocolURL url) {
        String protocol;
        String host = url.getHost();
        if (host == null) {
            return url.getProtocol() + MATCH_ALL_STRING;
        }
        if (host.startsWith("www.")) {
            host = host.substring(4);
        }
        if ("http".equals(protocol = url.getProtocol()) || "https".equals(protocol)) {
            protocol = "https?+";
        }
        return new StringBuilder(host.length() + 20).append(protocol).append("://(www.)?").append(Pattern.quote(host.toLowerCase(Locale.ROOT))).append(url.getPath()).append(MATCH_ALL_STRING).toString();
    }

    public boolean isPushCrawlProfile() {
        return this.name().startsWith(CRAWL_PROFILE_PUSH_STUB);
    }

    public void putProfileEntry(String CRAWL_PROFILE_PREFIX, serverObjects prop, boolean active, boolean dark, int count, int domlistlength) {
        int i;
        boolean terminateButton = active && !CrawlSwitchboard.DEFAULT_PROFILES.contains(this.name());
        boolean deleteButton = !active;
        prop.put(CRAWL_PROFILE_PREFIX + count + "_status", terminateButton ? 1L : (deleteButton ? 0L : 2L));
        prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton", terminateButton);
        prop.put(CRAWL_PROFILE_PREFIX + count + "_terminateButton_handle", this.handle());
        prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton", deleteButton);
        prop.put(CRAWL_PROFILE_PREFIX + count + "_deleteButton_handle", this.handle());
        prop.put(CRAWL_PROFILE_PREFIX + count + "_dark", dark ? "1" : "0");
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_handle", this.handle());
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_name", this.name());
        prop.put(CRAWL_PROFILE_PREFIX + count + "_depth", this.depth());
        prop.put(CRAWL_PROFILE_PREFIX + count + "_directDocByURL", this.isIndexNonParseableUrls() ? 1L : 0L);
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerNoLimitURLMustMatch", (String)this.get(CrawlAttribute.CRAWLER_URL_NODEPTHLIMITMATCH.key));
        prop.put(CRAWL_PROFILE_PREFIX + count + "_domMaxPages", this.domMaxPages());
        prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingQ", this.crawlingQ() ? 1L : 0L);
        prop.put(CRAWL_PROFILE_PREFIX + count + "_followFrames", this.followFrames() ? 1L : 0L);
        prop.put(CRAWL_PROFILE_PREFIX + count + "_obeyHtmlRobotsNoindex", this.obeyHtmlRobotsNoindex() ? 1L : 0L);
        prop.put(CRAWL_PROFILE_PREFIX + count + "_obeyHtmlRobotsNofollow", this.obeyHtmlRobotsNofollow() ? 1L : 0L);
        prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlerAlwaysCheckMediaType", this.isCrawlerAlwaysCheckMediaType());
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerURLMustMatch", (String)this.get(CrawlAttribute.CRAWLER_URL_MUSTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerURLMustNotMatch", (String)this.get(CrawlAttribute.CRAWLER_URL_MUSTNOTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerOriginURLMustMatch", (String)this.get(CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerOriginURLMustNotMatch", (String)this.get(CrawlAttribute.CRAWLER_ORIGIN_URL_MUSTNOTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerIPMustMatch", (String)this.get(CrawlAttribute.CRAWLER_IP_MUSTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerIPMustNotMatch", (String)this.get(CrawlAttribute.CRAWLER_IP_MUSTNOTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_crawlerCountryMustMatch", (String)this.get(CrawlAttribute.CRAWLER_COUNTRY_MUSTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_indexURLMustMatch", (String)this.get(CrawlAttribute.INDEXING_URL_MUSTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_indexURLMustNotMatch", (String)this.get(CrawlAttribute.INDEXING_URL_MUSTNOTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_indexContentMustMatch", (String)this.get(CrawlAttribute.INDEXING_CONTENT_MUSTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_indexContentMustNotMatch", (String)this.get(CrawlAttribute.INDEXING_CONTENT_MUSTNOTMATCH.key));
        prop.put(CRAWL_PROFILE_PREFIX + count + "_" + String.valueOf((Object)CrawlAttribute.NOINDEX_WHEN_CANONICAL_UNEQUAL_URL), this.noindexWhenCanonicalUnequalURL() ? 1L : 0L);
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTMATCH.key, (String)this.get(CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTNOTMATCH.key, (String)this.get(CrawlAttribute.INDEXING_MEDIA_TYPE_MUSTNOTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.INDEXING_SOLR_QUERY_MUSTMATCH.key, (String)this.get(CrawlAttribute.INDEXING_SOLR_QUERY_MUSTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.INDEXING_SOLR_QUERY_MUSTNOTMATCH.key, (String)this.get(CrawlAttribute.INDEXING_SOLR_QUERY_MUSTNOTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_recrawlIfOlder", this.recrawlIfOlder() == Long.MAX_VALUE ? "eternity" : new Date(this.recrawlIfOlder()).toString());
        prop.put(CRAWL_PROFILE_PREFIX + count + "_storeHTCache", this.storeHTCache() ? 1L : 0L);
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_cacheStrategy", (String)this.get(CrawlAttribute.CACHE_STRAGEGY.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_agentName", (String)this.get(CrawlAttribute.AGENT_NAME.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.SNAPSHOTS_MAXDEPTH.key, (String)this.get(CrawlAttribute.SNAPSHOTS_MAXDEPTH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.SNAPSHOTS_REPLACEOLD.key, (String)this.get(CrawlAttribute.SNAPSHOTS_REPLACEOLD.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.SNAPSHOTS_MUSTNOTMATCH.key, (String)this.get(CrawlAttribute.SNAPSHOTS_MUSTNOTMATCH.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.SNAPSHOTS_LOADIMAGE.key, (String)this.get(CrawlAttribute.SNAPSHOTS_LOADIMAGE.key));
        prop.put(CRAWL_PROFILE_PREFIX + count + "_remoteIndexing", this.remoteIndexing() ? 1L : 0L);
        prop.put(CRAWL_PROFILE_PREFIX + count + "_indexText", this.indexText() ? 1L : 0L);
        prop.put(CRAWL_PROFILE_PREFIX + count + "_indexMedia", this.indexMedia() ? 1L : 0L);
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.COLLECTIONS.key, (String)this.get(CrawlAttribute.COLLECTIONS.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.DEFAULT_VALENCY.key, (String)this.get(CrawlAttribute.DEFAULT_VALENCY.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.VALENCY_SWITCH_TAG_NAMES.key, (String)this.get(CrawlAttribute.VALENCY_SWITCH_TAG_NAMES.key));
        prop.putXML(CRAWL_PROFILE_PREFIX + count + "_" + CrawlAttribute.TIMEZONEOFFSET.key, (String)this.get(CrawlAttribute.TIMEZONEOFFSET.key));
        if (active && this.domMaxPages() > 0 && this.domMaxPages() != Integer.MAX_VALUE) {
            Object item;
            for (i = 0; i <= domlistlength && !((String)(item = this.domName(true, i))).isEmpty(); ++i) {
                if (i == domlistlength) {
                    item = (String)item + " ...";
                }
                prop.putHTML(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterContent_" + i + "_item", (String)item);
            }
        }
        prop.put(CRAWL_PROFILE_PREFIX + count + "_crawlingDomFilterContent", i);
    }

    public static void main(String[] args) {
        HashSet<String> a = new HashSet<String>();
        a.add("eins");
        a.add("zwei");
        a.add("drei");
        JSONArray j = new JSONArray(a);
        String s = j.toString();
        System.out.println(s);
        JSONTokener o = new JSONTokener(s);
        try {
            j = new JSONArray(o);
            System.out.println(j);
            HashSet<String> h = new HashSet<String>();
            for (int i = 0; i < j.length(); ++i) {
                h.add(j.getString(i));
            }
            System.out.println(h);
        }
        catch (JSONException e) {
            e.printStackTrace();
        }
    }

    public static enum CrawlAttribute {
        HANDLE("handle", true, 2, "Profile Handle"),
        NAME("name", true, 2, "Name"),
        DEPTH("generalDepth", false, 1, "Crawl Depth"),
        DIRECT_DOC_BY_URL("directDocByURL", false, 0, "Put all linked urls into index without parsing"),
        CRAWLER_URL_NODEPTHLIMITMATCH("crawlerNoLimitURLMustMatch", false, 2, "URL No-Depth-Limit Must-Match Filter"),
        DOM_MAX_PAGES("domMaxPages", false, 1, "Domain Max. Pages"),
        CRAWLING_Q("crawlingQ", false, 0, "CrawlingQ / '?'-URLs"),
        FOLLOW_FRAMES("followFrames", false, 0, "Flag if frames shall be followed (no by default)"),
        OBEY_HTML_ROBOTS_NOINDEX("obeyHtmlRobotsNoindex", false, 0, "Obey html-robots-noindex"),
        OBEY_HTML_ROBOTS_NOFOLLOW("obeyHtmlRobotsNofollow", false, 0, "Obey html-robots-nofollow"),
        CRAWLER_ALWAYS_CHECK_MEDIA_TYPE("crawlerAlwaysCheckMediaType", false, 0, "Always cross check file extension against actual Media Type"),
        CRAWLER_URL_MUSTMATCH("crawlerURLMustMatch", false, 2, "URL Must-Match Filter"),
        CRAWLER_URL_MUSTNOTMATCH("crawlerURLMustNotMatch", false, 2, "URL Must-Not-Match Filter"),
        CRAWLER_ORIGIN_URL_MUSTMATCH("crawlerOriginURLMustMatch", false, 2, "Links Origin URL Must-Match Filter"),
        CRAWLER_ORIGIN_URL_MUSTNOTMATCH("crawlerOriginURLMustNotMatch", false, 2, "Links Origin URL Must-Not-Match Filter"),
        CRAWLER_IP_MUSTMATCH("crawlerIPMustMatch", false, 2, "IP Must-Match Filter"),
        CRAWLER_IP_MUSTNOTMATCH("crawlerIPMustNotMatch", false, 2, "IP Must-Not-Match Filter"),
        CRAWLER_COUNTRY_MUSTMATCH("crawlerCountryMustMatch", false, 2, "Country Must-Match Filter"),
        INDEXING_URL_MUSTMATCH("indexURLMustMatch", false, 2, "Indexing URL Must-Match Filter"),
        INDEXING_URL_MUSTNOTMATCH("indexURLMustNotMatch", false, 2, "Indexing URL Must-Not-Match Filter"),
        INDEXING_CONTENT_MUSTMATCH("indexContentMustMatch", false, 2, "Indexing Content Must-Match Filter"),
        INDEXING_CONTENT_MUSTNOTMATCH("indexContentMustNotMatch", false, 2, "Indexing Content Must-Not-Match Filter"),
        INDEXING_MEDIA_TYPE_MUSTMATCH("indexMediaTypeMustMatch", false, 2, "Indexing Media Type (MIME) Must-Match Filter"),
        INDEXING_MEDIA_TYPE_MUSTNOTMATCH("indexMediaTypeMustNotMatch", false, 2, "Indexing Media Type (MIME) Must-Not-Match Filter"),
        INDEXING_SOLR_QUERY_MUSTMATCH("indexSolrQueryMustMatch", false, 2, "Indexing Solr Query Must-Match Filter"),
        INDEXING_SOLR_QUERY_MUSTNOTMATCH("indexSolrQueryMustNotMatch", false, 2, "Indexing Solr Query Must-Not-Match Filter"),
        NOINDEX_WHEN_CANONICAL_UNEQUAL_URL("noindexWhenCanonicalUnequalURL", false, 2, "No Indexing for Documents with Canonical != URL"),
        RECRAWL_IF_OLDER("recrawlIfOlder", false, 1, "Recrawl If Older"),
        STORE_HTCACHE("storeHTCache", false, 0, "Store in HTCache"),
        CACHE_STRAGEGY("cacheStrategy", false, 2, "Cache Strategy (NOCACHE,IFFRESH,IFEXIST,CACHEONLY)"),
        AGENT_NAME("agentName", false, 2, "User Agent Profile Name"),
        SNAPSHOTS_MAXDEPTH("snapshotsMaxDepth", false, 1, "Max Depth for Snapshots"),
        SNAPSHOTS_REPLACEOLD("snapshotsReplaceOld", false, 0, "Multiple Snapshot Versions - replace old with new"),
        SNAPSHOTS_MUSTNOTMATCH("snapshotsMustnotmatch", false, 2, "must-not-match filter for snapshot generation"),
        SNAPSHOTS_LOADIMAGE("snapshotsLoadImage", false, 0, "Flag for Snapshot image generation"),
        REMOTE_INDEXING("remoteIndexing", false, 0, "Remote Indexing (only for p2p networks)"),
        INDEX_TEXT("indexText", false, 0, "Index Text"),
        INDEX_MEDIA("indexMedia", false, 0, "Index Media"),
        COLLECTIONS("collections", false, 2, "Collections (comma-separated list)"),
        DEFAULT_VALENCY("default_valency", false, 2, "default tag valency"),
        VALENCY_SWITCH_TAG_NAMES("valency_switch_tag_names", false, 2, "DIV Class names when default valency shall be switched"),
        SCRAPER("scraper", false, 2, "Declaration for Vocabulary Scraper"),
        TIMEZONEOFFSET("timezoneOffset", true, 1, "Time Zone of Crawl Start Agent");

        public static final int BOOLEAN = 0;
        public static final int INTEGER = 1;
        public static final int STRING = 2;
        public final String key;
        public final String label;
        public final boolean readonly;
        public final int type;

        private CrawlAttribute(String key, boolean readonly, int type, String label) {
            this.key = key;
            this.readonly = readonly;
            this.type = type;
            this.label = label;
        }

        public String toString() {
            return this.key;
        }
    }
}

