/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.search.snippet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.regex.Pattern;
import net.yacy.cora.document.encoding.ASCII;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.federate.yacy.CacheStrategy;
import net.yacy.cora.order.Base64Order;
import net.yacy.cora.protocol.ClientIdentification;
import net.yacy.cora.storage.ARC;
import net.yacy.cora.storage.ConcurrentARC;
import net.yacy.cora.storage.HandleSet;
import net.yacy.cora.util.ByteArray;
import net.yacy.cora.util.ByteBuffer;
import net.yacy.crawler.retrieval.Request;
import net.yacy.crawler.retrieval.Response;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.document.SentenceReader;
import net.yacy.document.SnippetExtractor;
import net.yacy.document.WordTokenizer;
import net.yacy.document.parser.html.CharacterCoding;
import net.yacy.kelondro.data.meta.URIMetadataNode;
import net.yacy.kelondro.data.word.Word;
import net.yacy.peers.RemoteSearch;
import net.yacy.repository.Blacklist;
import net.yacy.repository.LoaderDispatcher;
import net.yacy.search.Switchboard;
import net.yacy.search.query.QueryGoal;
import net.yacy.search.snippet.TextSnippetStatistics;

public class TextSnippet
implements Comparable<TextSnippet>,
Comparator<TextSnippet> {
    private static final int MAX_CACHE = 1000;
    private static final Pattern p1 = Pattern.compile("\\A[^\\p{L}\\p{N}].+");
    private static final Pattern p2 = Pattern.compile(".+[^\\p{L}\\p{N}]\\Z");
    private static final Pattern p3 = Pattern.compile("\\A([\\p{L}\\p{N}]+[^\\p{L}\\p{N}].+)([\\p{N}]+[.,][\\p{N}])+\\Z");
    private static final Pattern p4 = Pattern.compile("[^\\p{L}\\p{N}]");
    public static final Cache snippetsCache = new Cache();
    public static final TextSnippetStatistics statistics = new TextSnippetStatistics();
    private byte[] urlhash;
    private String line;
    private boolean isMarked;
    private String error;
    private ResultClass resultStatus;
    private static final Pattern SPLIT_PATTERN = Pattern.compile("[ |-]+");
    private String descriptionline = null;
    private int hashCache = Integer.MIN_VALUE;

    public TextSnippet(DigestURL url, String line, boolean isMarked, ResultClass errorCode, String errortext) {
        long beginTime = System.currentTimeMillis();
        this.init(url, line, isMarked, errorCode, errortext, beginTime);
    }

    public TextSnippet(LoaderDispatcher loader, URIMetadataNode row, Set<String> queryTerms, HandleSet queryhashes, CacheStrategy cacheStrategy, boolean pre, int snippetMaxLength, boolean reindexing) {
        SnippetExtractor tsr;
        String wordhashes;
        long beginTime = System.currentTimeMillis();
        DigestURL url = row.url();
        if (queryTerms.isEmpty()) {
            this.init(url, null, false, ResultClass.ERROR_NO_TERM_GIVEN, "no query terms given", beginTime);
            return;
        }
        ResultClass source = ResultClass.SOURCE_CACHE;
        String urlHash = ASCII.String(url.hash());
        if (queryhashes != null) {
            wordhashes = RemoteSearch.set2string(queryhashes);
            String snippetLine = snippetsCache.get(wordhashes, urlHash);
            if (snippetLine != null) {
                this.init(url, snippetLine, false, source, null, beginTime);
                return;
            }
        } else {
            wordhashes = null;
        }
        String textline = null;
        Set<String> remainingTerms = new HashSet<String>(queryTerms);
        SentenceReader sentences = null;
        ArrayList<StringBuilder> firstSentencesList = null;
        TextSnippet.removeMatchingTerms(row.url().toTokens(), remainingTerms);
        TextSnippet.removeMatchingTerms(row.dc_title(), remainingTerms);
        TextSnippet.removeMatchingTerms(row.dc_creator(), remainingTerms);
        TextSnippet.removeMatchingTerms(row.dc_subject(), remainingTerms);
        if (!remainingTerms.isEmpty()) {
            String solrText;
            ArrayList<String> solrdesc = row.getDescription();
            if (!solrdesc.isEmpty()) {
                firstSentencesList = new ArrayList<StringBuilder>();
                for (String s : solrdesc) {
                    firstSentencesList.add(new StringBuilder(s));
                }
            }
            if ((solrText = row.getText()) != null && solrText.length() > 0) {
                sentences = new SentenceReader(firstSentencesList, solrText, pre);
            } else if (net.yacy.crawler.data.Cache.has(url.hash())) {
                Response response;
                Request request = loader == null ? null : loader.request(url, true, reindexing);
                try {
                    response = loader == null || request == null ? null : loader.load(request, CacheStrategy.CACHEONLY, Blacklist.BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent);
                }
                catch (IOException e1) {
                    response = null;
                }
                Document document = null;
                if (response != null) {
                    try {
                        document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
                        sentences = new SentenceReader(firstSentencesList, document.getTextString(), pre);
                        response = null;
                        document = null;
                    }
                    catch (Parser.Failure failure) {
                        // empty catch block
                    }
                }
            }
            if (sentences == null) {
                this.init(url, null, false, ResultClass.SOURCE_METADATA, null, beginTime);
                return;
            }
            if (sentences.iterator().hasNext()) {
                try {
                    tsr = new SnippetExtractor(sentences, remainingTerms, snippetMaxLength);
                    textline = tsr.getSnippet();
                    remainingTerms = tsr.getRemainingTerms();
                }
                catch (UnsupportedOperationException e) {
                    this.init(url, null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage(), beginTime);
                    return;
                }
            }
        }
        if (remainingTerms.isEmpty()) {
            if (textline == null || textline.length() == 0) {
                if (sentences == null) {
                    String solrText = row.getText();
                    if (solrText != null && solrText.length() > 0) {
                        sentences = new SentenceReader(firstSentencesList, solrText, pre);
                    }
                } else {
                    sentences.reset();
                }
                if (sentences == null || !sentences.iterator().hasNext()) {
                    textline = row.dc_subject();
                } else {
                    String h1s;
                    List<String> h1 = row.h1();
                    if (h1 != null && h1.size() > 0 && (h1s = h1.get(0)).length() > 0) {
                        Object prevSentence = null;
                        for (StringBuilder sentence : sentences) {
                            String currentSentence = sentence.toString();
                            if (prevSentence != null && ((String)prevSentence).startsWith(h1s)) {
                                textline = currentSentence;
                                break;
                            }
                            prevSentence = currentSentence;
                        }
                    }
                    if (textline == null) {
                        sentences.reset();
                        StringBuilder s = new StringBuilder(snippetMaxLength);
                        for (StringBuilder t : sentences) {
                            s.append((CharSequence)t).append(' ');
                            if (s.length() < snippetMaxLength / 4 * 3) continue;
                            break;
                        }
                        if (s.length() > snippetMaxLength) {
                            s.setLength(snippetMaxLength);
                            s.trimToSize();
                        }
                        textline = s.toString();
                    }
                }
            }
            this.init(url, textline.length() > 0 ? textline : this.line, false, ResultClass.SOURCE_METADATA, null, beginTime);
            return;
        }
        sentences = null;
        Response response = null;
        try {
            response = loader == null ? null : loader.load(loader.request(url, true, reindexing), url.isFile() || url.isSMB() ? CacheStrategy.NOCACHE : (cacheStrategy == null ? CacheStrategy.CACHEONLY : cacheStrategy), Blacklist.BlacklistType.SEARCH, ClientIdentification.yacyIntranetCrawlerAgent);
        }
        catch (IOException e) {
            response = null;
        }
        if (response == null) {
            if (cacheStrategy == null || cacheStrategy.mustBeOffline()) {
                this.init(url, null, false, ResultClass.ERROR_SOURCE_LOADING, "omitted network load (not allowed), no cache entry", beginTime);
                return;
            }
            this.init(url, null, false, ResultClass.ERROR_RESOURCE_LOADING, "error loading resource from net, no cache entry", beginTime);
            return;
        }
        if (!response.fromCache()) {
            Switchboard.getSwitchboard().toIndexer(response);
            source = ResultClass.SOURCE_WEB;
        }
        Document document = null;
        try {
            document = Document.mergeDocuments(response.url(), response.getMimeType(), response.parse());
        }
        catch (Parser.Failure e) {
            this.init(url, null, false, ResultClass.ERROR_PARSER_FAILED, e.getMessage(), beginTime);
            return;
        }
        if (document == null) {
            this.init(url, null, false, ResultClass.ERROR_PARSER_FAILED, "parser error/failed", beginTime);
            return;
        }
        sentences = new SentenceReader(document.getTextString(), pre);
        document.close();
        if (!sentences.hasNext()) {
            this.init(url, null, false, ResultClass.ERROR_PARSER_NO_LINES, "parser returned no sentences", beginTime);
            return;
        }
        try {
            tsr = new SnippetExtractor(sentences, remainingTerms, snippetMaxLength);
            textline = tsr.getSnippet();
            remainingTerms = tsr.getRemainingTerms();
        }
        catch (UnsupportedOperationException e) {
            this.init(url, null, false, ResultClass.ERROR_NO_MATCH, "snippet extractor failed:" + e.getMessage(), beginTime);
            return;
        }
        sentences = null;
        if (textline == null || !remainingTerms.isEmpty()) {
            this.init(url, null, false, ResultClass.ERROR_NO_MATCH, "no matching snippet found", beginTime);
            return;
        }
        if (textline.length() > snippetMaxLength) {
            textline = textline.substring(0, snippetMaxLength);
        }
        if (wordhashes != null) {
            snippetsCache.put(wordhashes, urlHash, textline);
        }
        this.init(url, textline, false, source, null, beginTime);
    }

    private void init(DigestURL url, String line, boolean isMarked, ResultClass errorCode, String errortext, long beginTime) {
        this.urlhash = url.hash();
        this.line = line;
        this.isMarked = isMarked;
        this.resultStatus = errorCode;
        this.error = errortext;
        statistics.addTextSnippetStatistics(url, System.currentTimeMillis() - beginTime, this.resultStatus);
    }

    public boolean exists() {
        return this.line != null;
    }

    public boolean isMarked() {
        return this.isMarked;
    }

    public String getLineRaw() {
        return this.line == null ? "" : this.line;
    }

    public String getError() {
        return this.error == null ? "" : this.error.trim();
    }

    public ResultClass getErrorCode() {
        return this.resultStatus;
    }

    public String getLineMarked(QueryGoal queryGoal) {
        HandleSet queryHashes = queryGoal.getIncludeHashes();
        if (this.line == null) {
            return "";
        }
        if (queryHashes == null || queryHashes.isEmpty()) {
            return this.line.trim();
        }
        if (this.line.endsWith(".")) {
            this.line = this.line.substring(0, this.line.length() - 1);
        }
        String[] words = SPLIT_PATTERN.split(this.line);
        Iterator<byte[]> iterator = queryHashes.iterator();
        HashSet<byte[]> queryHashesSet = new HashSet<byte[]>();
        while (iterator.hasNext()) {
            queryHashesSet.add(iterator.next());
        }
        for (int i = 0; i < words.length; ++i) {
            words[i] = TextSnippet.getWordMarked(words[i], queryHashesSet);
        }
        StringBuilder l = new StringBuilder(this.line.length() + queryHashes.size() * 8);
        for (String word : words) {
            l.append(word);
            l.append(' ');
        }
        return l.toString().trim();
    }

    public String descriptionline(QueryGoal queryGoal) {
        if (this.descriptionline != null) {
            return this.descriptionline;
        }
        this.descriptionline = this.isMarked ? CharacterCoding.unicode2html(this.getLineRaw(), false).replaceAll("&lt;b&gt;(.+?)&lt;/b&gt;", "<b>$1</b>") : this.getLineMarked(queryGoal);
        return this.descriptionline;
    }

    @Override
    public int compareTo(TextSnippet o) {
        return Base64Order.enhancedCoder.compare(this.urlhash, o.urlhash);
    }

    @Override
    public int compare(TextSnippet o1, TextSnippet o2) {
        return o1.compareTo(o2);
    }

    public int hashCode() {
        if (this.hashCache == Integer.MIN_VALUE) {
            this.hashCache = ByteArray.hashCode(this.urlhash);
        }
        return this.hashCache;
    }

    public String toString() {
        return this.line == null ? "" : this.line;
    }

    private static String getWordMarked(String word, Set<byte[]> queryHashes) {
        StringBuilder theWord = new StringBuilder(word);
        StringBuilder prefix = new StringBuilder(40);
        StringBuilder postfix = new StringBuilder(40);
        int len = 0;
        while (p1.matcher(theWord).find()) {
            prefix.append(theWord.substring(0, 1));
            theWord.delete(0, 1);
        }
        while (p2.matcher(theWord).find()) {
            len = theWord.length();
            postfix.insert(0, theWord.substring(len - 1, len));
            theWord.delete(len - 1, len);
        }
        if (p3.matcher(theWord).find()) {
            StringBuilder out = null;
            Object temp = "";
            for (int k = 0; k < theWord.length(); ++k) {
                out = new StringBuilder(80);
                if (p4.matcher(theWord.substring(k, k + 1)).find()) {
                    if (ByteBuffer.contains(queryHashes, Word.word2hash((String)temp))) {
                        temp = "<b>" + CharacterCoding.unicode2html((String)temp, false) + "</b>";
                    }
                    out.append((String)temp);
                    out.append(CharacterCoding.unicode2html(theWord.substring(k, k + 1), false));
                    temp = "";
                    continue;
                }
                if (k == theWord.length() - 1) {
                    if (ByteBuffer.contains(queryHashes, Word.word2hash((String)(temp = (String)temp + theWord.substring(k, k + 1))))) {
                        temp = "<b>" + CharacterCoding.unicode2html((String)temp, false) + "</b>";
                    }
                    out.append((String)temp);
                    temp = "";
                    continue;
                }
                temp = (String)temp + theWord.substring(k, k + 1);
            }
            theWord.delete(0, theWord.length());
            theWord.append((CharSequence)out);
        } else if (ByteBuffer.contains(queryHashes, Word.word2hash(theWord))) {
            theWord.replace(0, theWord.length(), CharacterCoding.unicode2html(theWord.toString(), false));
            theWord.insert(0, "<b>");
            theWord.append("</b>");
        }
        theWord.insert(0, CharacterCoding.unicode2html(prefix.toString(), false));
        theWord.append(CharacterCoding.unicode2html(postfix.toString(), false));
        return theWord.toString();
    }

    private static void removeMatchingTerms(String sentence, Set<String> queryTerms) {
        if (queryTerms.size() == 0) {
            return;
        }
        Set<String> sentenceWords = WordTokenizer.tokenizeSentence(sentence, 100).keySet();
        queryTerms.removeAll(sentenceWords);
    }

    public static enum ResultClass {
        SOURCE_SOLR(false),
        SOURCE_CACHE(false),
        SOURCE_FILE(false),
        SOURCE_WEB(false),
        SOURCE_METADATA(false),
        ERROR_NO_TERM_GIVEN(true),
        ERROR_SOURCE_LOADING(true),
        ERROR_RESOURCE_LOADING(true),
        ERROR_PARSER_FAILED(true),
        ERROR_PARSER_NO_LINES(true),
        ERROR_NO_MATCH(true);

        private final boolean fail;

        private ResultClass(boolean fail) {
            this.fail = fail;
        }

        public boolean fail() {
            return this.fail;
        }
    }

    public static class Cache {
        private final ARC<String, String> cache = new ConcurrentARC<String, String>(1000, Math.min(32, 2 * Runtime.getRuntime().availableProcessors()));

        public void put(String wordhashes, String urlhash, String snippet) {
            String key = urlhash + wordhashes;
            this.cache.insertIfAbsent(key, snippet);
        }

        public String get(String wordhashes, String urlhash) {
            String key = urlhash + wordhashes;
            return this.cache.get(key);
        }

        public boolean contains(String wordhashes, String urlhash) {
            return this.cache.containsKey(urlhash + wordhashes);
        }
    }
}

