Correlate string data

1) Normalize data

Remove all specials characters, and explode string if contain multiple words

/**
 * Normalize data string
 * @param  [string] $str
 * @return [array] 
 *          uppercase [str to uppercase] 
 *          exploded [str exploded & uppercased]
 */
function normalizeData(string $str) {
    $str = strtoupper(minusculesansaccent(trim($str)));
    // Nprmalize full string
    $strNormalized = preg_replace('/[^A-Za-z0-9\-]/', '', $str);
    $strNormalized = preg_replace('/-+/', '', $strNormalized);
    $strNormalized = strtoupper($strNormalized);

    // Normalize by word in string
    $tabParsed = parseData($str);

    $exploded = null;
    if (count($tabParsed) > 1) {    
        foreach ($tabParsed as $strParsed) {
            $exploded[] = normalizeData($strParsed);
        }
    } elseif (isset($tabParsed[0]) && $tabParsed[0] != $strNormalized) {
        $exploded[] = [
            'uppercase' => $tabParsed[0],
            'exploded' => null
        ];
    }

    return [
        'uppercase' => (string) $strNormalized,
        'exploded' => $exploded
    ];
}
PHP

2) Parse data

Used in normalizeData() for explode string if contain multiple words

/**
 * Explode string
 * @param  [string] $str
 * @return [array] $exploded
 */
function parseData(string $str) {
    $str = strtoupper(trim($str));
    $str = str_replace(" ", "-", $str);
    $str = str_replace("_", "-", $str);
    $exploded = explode("-", $str);

    // Remove exceptions
    $exceptions = ["UNE", "ILE", "ILES", "AUX"];
    foreach ($exploded as $key => $str) {
        if (in_array($str, $exceptions) || strlen($str) < 3) {
            unset($exploded[$key]);
        } else {
            $strNormalized = preg_replace('/[^A-Za-z0-9\-]/', '', $str);
            $strNormalized = preg_replace('/-+/', '', $strNormalized);
            $strNormalized = strtoupper($strNormalized);
            $exploded[$key] = $strNormalized;
        }
    }

    return array_values($exploded);
}
PHP

3) Correlation

find a correlation between two data string

/**
 * Check if variable1 was equal to variable2
 * @param  [string] $needle     [value search]
 * @param  [string] $haystack   [target for equal]
 * @return [integer(%)] $conviction [trust value of correlation]
 */
private function correlatedData(string $needle, string $haystack) {
    // Normalize data
    $needle = strtoupper(trim($needle));
    $strNormalized = normalizeData($needle);
    $haystack = strtoupper(trim($haystack));
    $targetNormalized = normalizeData($haystack);

    // Find with original str (is trust)
    if ($needle == $haystack || $needle == $targetNormalized['uppercase']) {
        $sim = similar_text($needle, $haystack, $conviction);
        return $conviction;
    }

    // Find with original str normalized (is trust)
    if ($strNormalized['uppercase'] == $targetNormalized['uppercase']) {
        $sim = similar_text($needle, $haystack, $conviction);
        return $conviction;
    }

    // Find with target exploded (not trust)
    if ($targetNormalized['exploded']) {
        foreach ($targetNormalized['exploded'] as $paysExploded) {
            if ($paysExploded['uppercase'] == $strNormalized['uppercase']) {
                $sim = similar_text($needle, $haystack, $conviction);
                return $conviction;
            }
        }
    }

    // Find with original str exploded (not trust)
    if ($strNormalized['exploded']) {
        foreach ($strNormalized['exploded'] as $strExploded) {
            if ($strExploded['uppercase'] == $targetNormalized['uppercase']) {
                $sim = similar_text($needle, $haystack, $conviction);
                return $conviction;
            }
        }
    }

    return $conviction = 0;
}
PHP

MarquandT

Ethical Hacker ~ Web Developper ~ Hosting Provider ~ Crypto Enthusiast ~ Automation Expert Bitcoin donation: 32Uu4NKGnxSPC7UukYXVyRHwbppbQpKVki

Comments

You must login to send a comment.