/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.document;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import net.yacy.cora.document.WordCache;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.language.synonyms.SynonymLibrary;
import net.yacy.cora.lod.vocabulary.Tagging;
import net.yacy.cora.order.NaturalOrder;
import net.yacy.document.LibraryProvider;
import net.yacy.document.SentenceReader;
import net.yacy.document.VocabularyScraper;
import net.yacy.document.WordTokenizer;
import net.yacy.kelondro.data.word.Word;
import net.yacy.kelondro.util.Bitfield;

public class Tokenizer {
    public static final boolean pseudostemming = false;
    public static final int wordminsize = 2;
    public static final int wordcut = 2;
    public static final int flag_cat_indexof = 0;
    public static final int flag_cat_haslocation = 19;
    public static final int flag_cat_hasimage = 20;
    public static final int flag_cat_hasaudio = 21;
    public static final int flag_cat_hasvideo = 22;
    public static final int flag_cat_hasapp = 23;
    protected final Map<String, Word> words;
    private final Set<String> synonyms;
    protected final Map<String, Set<Tagging.Metatag>> tags = new HashMap<String, Set<Tagging.Metatag>>();
    public int RESULT_NUMB_WORDS = -1;
    public int RESULT_NUMB_SENTENCES = -1;
    public Bitfield RESULT_FLAGS = new Bitfield(4);

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public Tokenizer(DigestURL root, String text, WordCache meaningLib, boolean doAutotagging, VocabularyScraper scraper) {
        this.words = new TreeMap<String, Word>(NaturalOrder.naturalComparator);
        this.synonyms = new LinkedHashSet<String>();
        assert (text != null);
        String[] wordcache = new String[LibraryProvider.autotagging.getMaxWordsInTerm() - 1];
        for (int i = 0; i < wordcache.length; ++i) {
            wordcache[i] = "";
        }
        int allwordcounter = 0;
        int allsentencecounter = 0;
        int wordInSentenceCounter = 1;
        boolean comb_indexof = false;
        boolean last_last = false;
        boolean last_index = false;
        if (LibraryProvider.autotagging.isEmpty()) {
            doAutotagging = false;
        }
        WordTokenizer wordenum = new WordTokenizer(new SentenceReader(text), meaningLib);
        try {
            while (wordenum.hasMoreElements()) {
                String word = wordenum.nextElement().toString().toLowerCase(Locale.ENGLISH);
                if (word.length() == 1 && SentenceReader.punctuation(word.charAt(0))) {
                    if (wordInSentenceCounter > 1) {
                        ++allsentencecounter;
                    }
                    wordInSentenceCounter = 1;
                    continue;
                }
                if (word.length() < 2) continue;
                if (doAutotagging) {
                    Set<String> vocabularyNames = LibraryProvider.autotagging.getVocabularyNames();
                    this.extendVocabularies(root, scraper, vocabularyNames);
                    this.extractAutoTagsFromText(wordcache, word, vocabularyNames);
                }
                System.arraycopy(wordcache, 1, wordcache, 0, wordcache.length - 1);
                wordcache[wordcache.length - 1] = word;
                if (last_last && comb_indexof && word.equals("modified")) {
                    this.RESULT_FLAGS.set(0, true);
                    wordenum.pre(true);
                }
                if (last_index && word.equals("of")) {
                    comb_indexof = true;
                }
                last_last = word.equals("last");
                last_index = word.equals("index");
                ++allwordcounter;
                Word wsp = this.words.get(word);
                if (wsp != null) {
                    wsp.inc();
                } else {
                    wsp = new Word(allwordcounter, wordInSentenceCounter, allsentencecounter + 100);
                    wsp.flags = this.RESULT_FLAGS.clone();
                    this.words.put(word, wsp);
                }
                ++wordInSentenceCounter;
            }
        }
        finally {
            wordenum.close();
            wordenum = null;
        }
        if (SynonymLibrary.size() > 0) {
            for (String word : this.words.keySet()) {
                Set<String> syms = SynonymLibrary.getSynonyms(word);
                if (syms == null) continue;
                this.synonyms.addAll(syms);
            }
        }
        this.RESULT_NUMB_WORDS = allwordcounter;
        this.RESULT_NUMB_SENTENCES = allsentencecounter + (wordInSentenceCounter > 1 ? 1 : 0);
    }

    protected void extractAutoTagsFromText(String[] wordcache, String word, Set<String> vocabularyNames) {
        if (vocabularyNames.size() > 0) {
            for (int wordc = 1; wordc <= wordcache.length + 1; ++wordc) {
                StringBuilder sb = new StringBuilder();
                if (wordc == 1) {
                    sb.append(word);
                } else {
                    for (int w = 0; w < wordc - 1; ++w) {
                        sb.append(wordcache[wordcache.length - wordc + w + 1]).append(' ');
                    }
                    sb.append(word);
                }
                String testterm = sb.toString().trim();
                Tagging.Metatag tag = LibraryProvider.autotagging.getTagFromTerm(vocabularyNames, testterm);
                if (tag == null) continue;
                String navigatorName = tag.getVocabularyName();
                Set<Tagging.Metatag> tagset = this.tags.get(navigatorName);
                if (tagset == null) {
                    tagset = new HashSet<Tagging.Metatag>();
                    this.tags.put(navigatorName, tagset);
                }
                tagset.add(tag);
            }
        }
    }

    protected void extendVocabularies(DigestURL root, VocabularyScraper scraper, Set<String> vocabularyNames) {
        Map<String, String> vocMap;
        Map<String, String> map = vocMap = scraper == null ? null : scraper.removeVocMap(root);
        if (vocMap != null && vocMap.size() > 0) {
            for (Map.Entry<String, String> entry2 : vocMap.entrySet()) {
                String navigatorName = entry2.getKey();
                String term = entry2.getValue();
                vocabularyNames.remove(navigatorName);
                Tagging vocabulary = LibraryProvider.autotagging.getVocabulary(navigatorName);
                if (vocabulary == null) continue;
                String obj = vocabulary.getObjectlink(term);
                if (obj == null) {
                    try {
                        vocabulary.put(term, "", root.toNormalform(true));
                    }
                    catch (IOException iOException) {
                        // empty catch block
                    }
                }
                Tagging.Metatag tag = vocabulary.getMetatagFromTerm(term);
                HashSet<Tagging.Metatag> tagset = new HashSet<Tagging.Metatag>();
                tagset.add(tag);
                this.tags.put(navigatorName, tagset);
            }
        }
    }

    public Map<String, Word> words() {
        return this.words;
    }

    public static Map<String, Word> getWords(String text, WordCache meaningLib) {
        if (text == null) {
            return null;
        }
        return new Tokenizer(null, text, meaningLib, false, null).words();
    }

    public List<String> synonyms() {
        ArrayList<String> l = new ArrayList<String>(this.synonyms.size());
        for (String s : this.synonyms) {
            l.add(s);
        }
        return l;
    }

    public Map<String, Set<Tagging.Metatag>> tags() {
        return this.tags;
    }
}

