/*
 * Decompiled with CFR 0.152.
 */
package net.yacy.document.parser;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import net.yacy.cora.document.id.DigestURL;
import net.yacy.cora.util.CommonPattern;
import net.yacy.document.AbstractParser;
import net.yacy.document.Document;
import net.yacy.document.Parser;
import net.yacy.document.VocabularyScraper;

public class csvParser
extends AbstractParser
implements Parser {
    public csvParser() {
        super("Comma Separated Value Parser");
        this.SUPPORTED_EXTENSIONS.add("csv");
    }

    @Override
    public Document[] parse(DigestURL location, String mimeType, String charset, VocabularyScraper scraper, int timezoneOffset, InputStream source) throws Parser.Failure, InterruptedException {
        List<String[]> table = csvParser.getTable(charset, source);
        if (table.isEmpty()) {
            throw new Parser.Failure("document has no lines", location);
        }
        StringBuilder sb = new StringBuilder();
        for (String[] row : table) {
            sb.append(csvParser.concatRow(row)).append(' ');
        }
        return new Document[]{new Document(location, mimeType, charset, this, null, null, csvParser.singleList(csvParser.concatRow(table.get(0))), null, "", null, null, 0.0, 0.0, sb.toString(), null, null, null, false, new Date())};
    }

    private static String concatRow(String[] columns) {
        StringBuilder sb = new StringBuilder(80);
        for (String column : columns) {
            if (sb.length() > 0) {
                sb.append(' ');
            }
            sb.append(column);
        }
        sb.append('.');
        return sb.toString();
    }

    private static List<String[]> getTable(String charset, InputStream source) {
        BufferedReader reader;
        ArrayList<String[]> rows = new ArrayList<String[]>();
        try {
            reader = new BufferedReader(new InputStreamReader(source, charset));
        }
        catch (UnsupportedEncodingException e1) {
            reader = new BufferedReader(new InputStreamReader(source));
        }
        String separator = null;
        int columns = -1;
        try {
            String row;
            while ((row = reader.readLine()) != null) {
                if ((row = row.trim()).isEmpty()) continue;
                if (separator == null) {
                    String[] colc = CommonPattern.COMMA.split(row);
                    String[] cols = CommonPattern.SEMICOLON.split(row);
                    String[] colt = CommonPattern.TAB.split(row);
                    if (colc.length >= cols.length && colc.length >= colt.length) {
                        separator = ",";
                    }
                    if (cols.length >= colc.length && cols.length >= colt.length) {
                        separator = ";";
                    }
                    if (colt.length >= cols.length && colt.length >= colc.length) {
                        separator = "\t";
                    }
                }
                row = csvParser.stripQuotes(row, '\"', separator.charAt(0), ' ');
                row = csvParser.stripQuotes(row, '\'', separator.charAt(0), ' ');
                String[] cols = row.split(separator);
                if (columns == -1) {
                    columns = cols.length;
                }
                rows.add(cols);
            }
        }
        catch (IOException iOException) {
            // empty catch block
        }
        return rows;
    }

    private static String stripQuotes(String line, char quote, char separator, char replacement) {
        int p;
        Object ret = line;
        while ((p = ((String)ret).indexOf(quote)) >= 0) {
            int q = ((String)ret).indexOf(quote, p + 1);
            if (q < 0) {
                return ((String)ret).substring(0, p) + ((String)ret).substring(p + 1);
            }
            ret = ((String)ret).substring(0, p) + ((String)ret).substring(p + 1, q).replace(separator, replacement) + ((String)ret).substring(q + 1);
        }
        return ret;
    }
}

