/*
 * Decompiled with CFR 0.152.
 */
package ai.djl.modality.nlp.preprocess;

import ai.djl.modality.nlp.preprocess.TextProcessor;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

public class HyphenNormalizer
implements TextProcessor {
    private static final int SOFT_HYPHEN = 173;
    private static final Set<Integer> HYPHENS = new HashSet<Integer>(Arrays.asList(45, 126, 173, 1418, 1470, 8208, 8209, 8210, 8211, 8212, 8213, 8275, 8315, 8331, 8722, 11834, 11835, 12316, 12336, 65073, 65074, 65112, 65123, 65293));

    public static boolean isHyphenLike(Integer codePoint) {
        return HYPHENS.contains(codePoint);
    }

    public static String normalizeHyphens(String s2) {
        int cp;
        StringBuilder temp = new StringBuilder(s2.length());
        for (int position = 0; position < s2.length(); position += Character.isBmpCodePoint(cp) ? 1 : 2) {
            cp = s2.codePointAt(position);
            if (cp == 173) continue;
            if (HyphenNormalizer.isHyphenLike(cp)) {
                temp.append('-');
                continue;
            }
            temp.appendCodePoint(cp);
        }
        return temp.toString();
    }

    @Override
    public List<String> preprocess(List<String> tokens) {
        return tokens.stream().map(HyphenNormalizer::normalizeHyphens).collect(Collectors.toList());
    }
}

