Correlate string data

1) Normalize data

Remove all specials characters, and explode string if contain multiple words

/**
 * Normalize data string
 * @param  [string] $str
 * @return [array] 
 *         	uppercase [str to uppercase] 
 *          exploded [str exploded & uppercased]
 */
function normalizeData(string $str) {
	$str = strtoupper(minusculesansaccent(trim($str)));
   	// Nprmalize full string
   	$strNormalized = preg_replace('/[^A-Za-z0-9\-]/', '', $str);
   	$strNormalized = preg_replace('/-+/', '', $strNormalized);
   	$strNormalized = strtoupper($strNormalized);

   	// Normalize by word in string
   	$tabParsed = parseData($str);

   	$exploded = null;
   	if (count($tabParsed) > 1) {	
	   	foreach ($tabParsed as $strParsed) {
	   		$exploded[] = normalizeData($strParsed);
	   	}
   	} elseif (isset($tabParsed[0]) && $tabParsed[0] != $strNormalized) {
   		$exploded[] = [
   			'uppercase' => $tabParsed[0],
   			'exploded' => null
   		];
   	}

   	return [
   		'uppercase' => (string) $strNormalized,
   		'exploded' => $exploded
   	];
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34

2) Parse data

Used in normalizeData() for explode string if contain multiple words

/**
 * Explode string
 * @param  [string] $str
 * @return [array] $exploded
 */
function parseData(string $str) {
	$str = strtoupper(trim($str));
	$str = str_replace(" ", "-", $str);
	$str = str_replace("_", "-", $str);
	$exploded = explode("-", $str);

	// Remove exceptions
	$exceptions = ["UNE", "ILE", "ILES", "AUX"];
	foreach ($exploded as $key => $str) {
		if (in_array($str, $exceptions) || strlen($str) < 3) {
			unset($exploded[$key]);
		} else {
		   	$strNormalized = preg_replace('/[^A-Za-z0-9\-]/', '', $str);
		   	$strNormalized = preg_replace('/-+/', '', $strNormalized);
		   	$strNormalized = strtoupper($strNormalized);
			$exploded[$key] = $strNormalized;
		}
	}

	return array_values($exploded);
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26

3) Correlation

find a correlation between two data string

/**
 * Check if variable1 was equal to variable2
 * @param  [string] $needle		[value search]
 * @param  [string] $haystack	[target for equal]
 * @return [integer(%)] $conviction [trust value of correlation]
 */
private function correlatedData(string $needle, string $haystack) {
	// Normalize data
	$needle = strtoupper(trim($needle));
	$strNormalized = normalizeData($needle);
	$haystack = strtoupper(trim($haystack));
	$targetNormalized = normalizeData($haystack);

	// Find with original str (is trust)
	if ($needle == $haystack || $needle == $targetNormalized['uppercase']) {
		$sim = similar_text($needle, $haystack, $conviction);
		return $conviction;
	}

	// Find with original str normalized (is trust)
	if ($strNormalized['uppercase'] == $targetNormalized['uppercase']) {
		$sim = similar_text($needle, $haystack, $conviction);
		return $conviction;
	}
	
	// Find with target exploded (not trust)
	if ($targetNormalized['exploded']) {
		foreach ($targetNormalized['exploded'] as $paysExploded) {
			if ($paysExploded['uppercase'] == $strNormalized['uppercase']) {
				$sim = similar_text($needle, $haystack, $conviction);
				return $conviction;
			}
		}
	}

	// Find with original str exploded (not trust)
	if ($strNormalized['exploded']) {
		foreach ($strNormalized['exploded'] as $strExploded) {
			if ($strExploded['uppercase'] == $targetNormalized['uppercase']) {
				$sim = similar_text($needle, $haystack, $conviction);
				return $conviction;
			}
		}
	}

	return $conviction = 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
Last Updated: 1/3/2020, 7:47:51 AM