/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.document;

import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import net.yacy.cora.bayes.BayesClassifier;
import net.yacy.cora.bayes.Classification;
import net.yacy.cora.util.ConcurrentLog;

public class ProbabilisticClassifier {
    public static final String NONE_CATEGORY_NAME = "NONE";
    public static final Category NONE_CATEGORY = new Category("NONE");
    private static Map<String, Context> contexts = new HashMap<String, Context>();

    public static Set<String> getContextNames() {
        return contexts.keySet();
    }

    public static Context getContext(String contextName) {
        return contexts.get(contextName);
    }

    public static void initialize(File path_to_context_directory) {
        String[] context_candidates;
        contexts.clear();
        for (String context_candidate : context_candidates = path_to_context_directory.list()) {
            File ccf = new File(path_to_context_directory, context_candidate);
            if (!ccf.isDirectory()) continue;
            String[] category_candidates = ccf.list();
            HashMap<String, File> categoryExampleLinesFiles = new HashMap<String, File>();
            File negativeExampleLines = null;
            for (String category_candidate : category_candidates) {
                if (!category_candidate.endsWith(".txt")) continue;
                File catcf = new File(ccf, category_candidate);
                if (category_candidate.startsWith("negative")) {
                    negativeExampleLines = catcf;
                    continue;
                }
                categoryExampleLinesFiles.put(category_candidate.substring(0, category_candidate.length() - 4), catcf);
            }
            if (negativeExampleLines == null || categoryExampleLinesFiles.size() <= 0) continue;
            try {
                Context context = new Context(context_candidate, categoryExampleLinesFiles, negativeExampleLines);
                contexts.put(context_candidate, context);
            }
            catch (IOException e) {
                ConcurrentLog.logException(e);
            }
        }
    }

    public static Map<String, String> getClassification(String text) {
        HashMap<String, String> c = new HashMap<String, String>();
        for (Context context : contexts.values()) {
            Classification<String, Category> classification = context.classify(text);
            String contextname = context.getName();
            Category category = classification.getCategory();
            String categoryname = category.getName();
            c.put(contextname, categoryname);
        }
        return c;
    }

    public static class Context {
        private String context_name;
        private BayesClassifier<String, Category> bayes;

        public Context(String context_name, Map<String, File> categoryExampleLinesFiles, File negativeExampleLines) throws IOException {
            this.context_name = context_name;
            int requiredSize = 0;
            Charset charset = StandardCharsets.UTF_8;
            HashMap<String, List<String>> categoryBuffer = new HashMap<String, List<String>>();
            for (Map.Entry<String, File> category : categoryExampleLinesFiles.entrySet()) {
                List<String> list2 = Files.readAllLines(category.getValue().toPath(), charset);
                categoryBuffer.put(category.getKey(), list2);
                requiredSize += list2.size();
            }
            List<String> list3 = Files.readAllLines(negativeExampleLines.toPath(), charset);
            categoryBuffer.put(ProbabilisticClassifier.NONE_CATEGORY_NAME, Files.readAllLines(negativeExampleLines.toPath(), charset));
            this.bayes = new BayesClassifier();
            this.bayes.setMemoryCapacity(requiredSize += list3.size());
            for (Map.Entry category : categoryBuffer.entrySet()) {
                Category c = new Category((String)category.getKey());
                for (String line : (List)category.getValue()) {
                    List<String> tokens = this.normalize(line);
                    this.bayes.learn(c, tokens);
                }
            }
            this.bayes.learn(NONE_CATEGORY, (Collection)categoryBuffer.get(ProbabilisticClassifier.NONE_CATEGORY_NAME));
        }

        private List<String> normalize(String phrase) {
            String cleanphrase = phrase.toLowerCase().replaceAll("\\W", " ");
            String[] rawtokens = cleanphrase.split("\\s");
            ArrayList<String> tokens = new ArrayList<String>();
            for (String token : rawtokens) {
                if (token.length() <= 2) continue;
                tokens.add(token);
            }
            return tokens;
        }

        public String getName() {
            return this.context_name;
        }

        public Classification<String, Category> classify(String phrase) {
            List<String> words = this.normalize(phrase);
            return this.bayes.classify(words);
        }
    }

    public static class Category {
        String category_name;

        public Category(String category_name) {
            this.category_name = category_name;
        }

        public String getName() {
            return this.category_name;
        }
    }
}

