/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.crawler;

import java.io.IOException;
import java.net.MalformedURLException;
import java.time.LocalDateTime;
import java.util.Date;
import java.util.HashSet;
import java.util.Set;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.solr.connector.SolrConnector;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.util.ConcurrentLog;
import net.yacy.crawler.data.CrawlProfile;
import net.yacy.crawler.data.NoticedURL;
import net.yacy.crawler.retrieval.Request;
import net.yacy.document.parser.html.TagValency;
import net.yacy.kelondro.workflow.AbstractBusyThread;
import net.yacy.search.Switchboard;
import net.yacy.search.schema.CollectionSchema;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;

public class RecrawlBusyThread
extends AbstractBusyThread {
    public static final String THREAD_NAME = "recrawlindex";
    public static final String DEFAULT_QUERY = CollectionSchema.fresh_date_dt.getSolrFieldName() + ":[* TO NOW/DAY-1DAY]";
    public static final boolean DEFAULT_INCLUDE_FAILED = false;
    public static final boolean DEFAULT_DELETE_ON_RECRAWL = false;
    private String currentQuery;
    private boolean includefailed;
    private boolean deleteOnRecrawl;
    private int chunkstart = 0;
    private final int chunksize = 100;
    private final Switchboard sb;
    private final Set<DigestURL> urlstack;
    private long urlsToRecrawl = 0L;
    private long recrawledUrlsCount = 0L;
    private long rejectedUrlsCount = 0L;
    private long malformedUrlsCount = 0L;
    private long malformedUrlsDeletedCount = 0L;
    private final String solrSortBy;
    private boolean moreToRecrawl = true;
    private boolean terminatedBySolrFailure = false;
    private LocalDateTime startTime;
    private LocalDateTime endTime;

    public RecrawlBusyThread(Switchboard xsb, String query2, boolean includeFailed, boolean deleteOnRecrawl) {
        super(3000L, 1000L);
        this.setName(THREAD_NAME);
        this.setIdleSleep(600000L);
        this.setBusySleep(120000L);
        this.setPriority(1);
        this.setLoadPreReqisite(1.0);
        this.sb = xsb;
        this.currentQuery = query2;
        this.includefailed = includeFailed;
        this.deleteOnRecrawl = deleteOnRecrawl;
        this.urlstack = new HashSet<DigestURL>();
        this.solrSortBy = CollectionSchema.load_date_dt.getSolrFieldName() + " asc";
        SolrConnector solrConnector = this.sb.index.fulltext().getDefaultConnector();
        if (solrConnector != null && !solrConnector.isClosed()) {
            solrConnector.commit(true);
        }
    }

    public void setQuery(String q, boolean includefailedurls, boolean deleteOnRecrawl) {
        this.currentQuery = q;
        this.includefailed = includefailedurls;
        this.deleteOnRecrawl = deleteOnRecrawl;
        this.chunkstart = 0;
    }

    public String getQuery() {
        return this.currentQuery;
    }

    public static final String buildSelectionQuery(String queryBase, boolean includeFailed) {
        return includeFailed ? queryBase : queryBase + " AND (" + CollectionSchema.httpstatus_i.name() + ":200)";
    }

    public void setIncludeFailed(boolean includefailedurls) {
        this.includefailed = includefailedurls;
    }

    public boolean getIncludeFailed() {
        return this.includefailed;
    }

    public void setDeleteOnRecrawl(boolean deleteOnRecrawl) {
        this.deleteOnRecrawl = deleteOnRecrawl;
    }

    public boolean getDeleteOnRecrawl() {
        return this.deleteOnRecrawl;
    }

    private boolean feedToCrawler() {
        int added = 0;
        if (!this.urlstack.isEmpty()) {
            CrawlProfile profile2 = this.sb.crawler.defaultRecrawlJobProfile;
            for (DigestURL url : this.urlstack) {
                Request request = new Request(ASCII.getBytes(this.sb.peers.mySeed().hash), url, null, "", new Date(), profile2.handle(), 0, profile2.timezoneOffset());
                String acceptedError = this.sb.crawlStacker.checkAcceptanceChangeable(url, profile2, 0);
                if (!this.includefailed && acceptedError == null) {
                    acceptedError = this.sb.crawlStacker.checkAcceptanceInitially(url, profile2);
                }
                if (acceptedError != null) {
                    ++this.rejectedUrlsCount;
                    ConcurrentLog.info(THREAD_NAME, "addToCrawler: cannot load " + url.toNormalform(true) + ": " + acceptedError);
                    continue;
                }
                String s = this.sb.crawlQueues.noticeURL.push(NoticedURL.StackType.LOCAL, request, profile2, this.sb.robots);
                if (s != null) {
                    ++this.rejectedUrlsCount;
                    ConcurrentLog.info(THREAD_NAME, "addToCrawler: failed to add " + url.toNormalform(true) + ": " + s);
                    continue;
                }
                ++added;
                ++this.recrawledUrlsCount;
            }
            this.urlstack.clear();
        }
        return added > 0;
    }

    @Override
    public boolean job() {
        if (this.sb.crawlQueues.coreCrawlJobSize() > this.chunksize) {
            return false;
        }
        boolean didSomething = false;
        if (this.urlstack.isEmpty()) {
            if (!this.moreToRecrawl) {
                this.terminate(false);
            } else {
                this.moreToRecrawl = this.processSingleQuery();
                didSomething = true;
            }
        } else {
            didSomething = this.feedToCrawler();
        }
        return didSomething;
    }

    @Override
    public synchronized void start() {
        this.startTime = LocalDateTime.now();
        super.start();
    }

    @Override
    public void terminate(boolean waitFor) {
        super.terminate(waitFor);
        this.endTime = LocalDateTime.now();
    }

    private boolean processSingleQuery() {
        if (!this.urlstack.isEmpty()) {
            return true;
        }
        SolrDocumentList docList = null;
        SolrConnector solrConnector = this.sb.index.fulltext().getDefaultConnector();
        if (solrConnector == null || solrConnector.isClosed()) {
            this.urlsToRecrawl = 0L;
            this.terminatedBySolrFailure = true;
            return false;
        }
        try {
            docList = solrConnector.getDocumentListByQuery(RecrawlBusyThread.buildSelectionQuery(this.currentQuery, this.includefailed), this.solrSortBy, this.chunkstart, this.chunksize, CollectionSchema.id.getSolrFieldName(), CollectionSchema.sku.getSolrFieldName());
            this.urlsToRecrawl = docList.getNumFound();
        }
        catch (Throwable e) {
            this.urlsToRecrawl = 0L;
            this.terminatedBySolrFailure = true;
        }
        if (docList != null) {
            HashSet<String> tobedeletedIDs = new HashSet<String>();
            for (SolrDocument doc : docList) {
                try {
                    this.urlstack.add(new DigestURL((String)doc.getFieldValue(CollectionSchema.sku.getSolrFieldName())));
                    if (!this.deleteOnRecrawl) continue;
                    tobedeletedIDs.add((String)doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
                }
                catch (MalformedURLException ex) {
                    ++this.malformedUrlsCount;
                    tobedeletedIDs.add((String)doc.getFieldValue(CollectionSchema.id.getSolrFieldName()));
                    ++this.malformedUrlsDeletedCount;
                    ConcurrentLog.severe(THREAD_NAME, "deleted index document with invalid url " + (String)doc.getFieldValue(CollectionSchema.sku.getSolrFieldName()));
                }
            }
            if (!tobedeletedIDs.isEmpty()) {
                try {
                    solrConnector.deleteByIds(tobedeletedIDs);
                    solrConnector.commit(false);
                }
                catch (IOException e) {
                    ConcurrentLog.severe(THREAD_NAME, "error deleting IDs ", e);
                }
            }
            int n = this.chunkstart = this.deleteOnRecrawl ? 0 : this.chunkstart + this.chunksize;
        }
        return docList != null && docList.size() >= this.chunksize;
    }

    public static CrawlProfile buildDefaultCrawlProfile() {
        CrawlProfile profile2 = new CrawlProfile("recrawlJob", ".*", "", ".*", "", "", "", ".*", "", ".*", "", false, 0, false, CrawlProfile.getRecrawlDate(60L), -1, true, true, true, false, true, true, true, false, -1, false, true, "", CacheStrategy.IFFRESH, "robot_recrawlJob", "YaCy Internet (cautious)", TagValency.EVAL, null, null, 0);
        return profile2;
    }

    @Override
    public int getJobCount() {
        return this.urlstack.size();
    }

    public long getUrlsToRecrawl() {
        return this.urlsToRecrawl;
    }

    public long getRecrawledUrlsCount() {
        return this.recrawledUrlsCount;
    }

    public long getRejectedUrlsCount() {
        return this.rejectedUrlsCount;
    }

    public long getMalformedUrlsCount() {
        return this.malformedUrlsCount;
    }

    public long getMalformedUrlsDeletedCount() {
        return this.malformedUrlsDeletedCount;
    }

    public boolean isTerminatedBySolrFailure() {
        return this.terminatedBySolrFailure;
    }

    public LocalDateTime getStartTime() {
        return this.startTime;
    }

    public LocalDateTime getEndTime() {
        return this.endTime;
    }

    @Override
    public void freemem() {
        this.urlstack.clear();
    }
}

