276 lines
12 KiB
PHP
276 lines
12 KiB
PHP
<?php
|
|
|
|
/**
|
|
* This script parses a wiki text file containing network information and generates an SQL script
|
|
* to populate the IpNetwork table with a complete, hierarchical representation of the network.
|
|
*
|
|
* It includes on-the-fly data cleaning, hierarchical network detection from headers, robust parent-child logic,
|
|
* duplicate prevention, IP address validation, and error logging to 'initial_data_errors.txt'.
|
|
*
|
|
* @version 7.0
|
|
* @author Gemini
|
|
*/
|
|
|
|
// --- Configuration ---
|
|
$inputFile = 'combined_wiki.txt';
|
|
$outputFile = 'initial_data.sql'; // Changed: Standardized output filename
|
|
$dbTableName = 'IpNetwork';
|
|
$errorFile = 'initial_data_errors.txt';
|
|
|
|
// --- Main Execution ---
|
|
|
|
$parsing_errors = []; // Initialize error log array
|
|
|
|
// Delete old error log if it exists
|
|
if (file_exists($errorFile)) {
|
|
unlink($errorFile);
|
|
}
|
|
|
|
// Read and parse the wiki file into a structured PHP array
|
|
$networkData = parseWikiFile($inputFile, $parsing_errors);
|
|
|
|
// Generate the SQL script from the parsed data
|
|
$sqlScript = generateSqlScript($networkData, $dbTableName, $parsing_errors);
|
|
|
|
// Save the generated SQL script to the output file
|
|
file_put_contents($outputFile, $sqlScript);
|
|
|
|
echo "Final SQL script was successfully generated in '$outputFile'\n";
|
|
|
|
// Save errors if any were found
|
|
if (!empty($parsing_errors)) {
|
|
$error_log_content = "Parsing process found the following issues:\n\n";
|
|
$error_log_content .= implode("\n", $parsing_errors);
|
|
file_put_contents($errorFile, $error_log_content);
|
|
echo "Found " . count($parsing_errors) . " issues during processing. See $errorFile for details.\n";
|
|
}
|
|
|
|
|
|
/**
|
|
* Parses the wiki text file, cleans data, validates IPs, checks for duplicates, and organizes the network data.
|
|
* It now also creates parent network blocks based on file headers.
|
|
*
|
|
* @param string $filename The path to the wiki text file.
|
|
* @param array &$errors Array to store logging information.
|
|
* @return array A flat list of unique, valid network/host entries.
|
|
*/
|
|
function parseWikiFile(string $filename, array &$errors): array
|
|
{
|
|
if (!file_exists($filename)) {
|
|
die("Error: Input file '$filename' not found.\n");
|
|
}
|
|
|
|
$lines = file($filename, FILE_IGNORE_NEW_LINES);
|
|
$entries = [];
|
|
$seen_networks = []; // Tracker for duplicates
|
|
$pending_description = '';
|
|
$ignore_section = false;
|
|
|
|
foreach ($lines as $line_number => $line) {
|
|
// 1. Clean up the line from multiple inconsistencies
|
|
$cleaned_line = html_entity_decode($line, ENT_QUOTES | ENT_HTML5);
|
|
$cleaned_line = str_replace(['\\', ' '], ['', ' '], $cleaned_line);
|
|
$cleaned_line = preg_replace('/\[([^\]]+)\]\(mailto:[^\)]+\)/', '$1', $cleaned_line);
|
|
$cleaned_line = trim(preg_replace('/\s+/', ' ', $cleaned_line));
|
|
|
|
// 2. Check for section headers (START, END, etc.)
|
|
if (str_starts_with($cleaned_line, '=====')) {
|
|
$pending_description = ''; // Reset for any new section
|
|
$ignore_section = (stripos($line, 'gelöschte IP Netze') !== false);
|
|
|
|
// IMPROVEMENT: Specifically parse START headers for network definitions
|
|
if (!$ignore_section && preg_match('/^===== START\s+([0-9\.]+)(?:[\/-](\d{1,2}))?\s*(.*?)(?:\.md)?\s*=====$/', $cleaned_line, $header_matches)) {
|
|
$ip_from_header = $header_matches[1];
|
|
$cidr_from_header = $header_matches[2] ?? null;
|
|
$desc_from_header = trim($header_matches[3]);
|
|
|
|
// Heuristic: If CIDR is missing but IP ends in .0, assume it's a /24 network block.
|
|
if ($cidr_from_header === null && preg_match('/\.0$/', $ip_from_header)) {
|
|
$cidr_from_header = 24;
|
|
if (empty($desc_from_header) || strtolower($desc_from_header) === 'linknetze') {
|
|
$desc_from_header = "Network Block {$ip_from_header}";
|
|
}
|
|
}
|
|
|
|
// If a full network/CIDR is defined or inferred from the header, add it as a distinct entry.
|
|
if ($cidr_from_header !== null && filter_var($ip_from_header, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
|
|
$network_key = "$ip_from_header/$cidr_from_header";
|
|
if (!isset($seen_networks[$network_key])) {
|
|
$entries[] = [
|
|
'ip' => $ip_from_header,
|
|
'cidr' => (int)$cidr_from_header,
|
|
'name' => $desc_from_header,
|
|
'description' => $desc_from_header,
|
|
];
|
|
$seen_networks[$network_key] = true;
|
|
}
|
|
}
|
|
}
|
|
|
|
continue; // Header processed, move to next line
|
|
}
|
|
|
|
if ($ignore_section || empty($cleaned_line)) {
|
|
if (empty($cleaned_line)) $pending_description = '';
|
|
continue;
|
|
}
|
|
|
|
// 3. Handle special HTML table data
|
|
if (strpos($line, '<td') !== false) {
|
|
preg_match_all('/<td[^>]*>([\d\.]+)<\/td>/', $line, $ip_matches);
|
|
if (!empty($ip_matches[1])) {
|
|
foreach ($ip_matches[1] as $ip_from_table) {
|
|
if (filter_var($ip_from_table, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
|
|
$entries[] = [ 'ip' => $ip_from_table, 'cidr' => 32, 'name' => 'CGNAT Host', 'description' => 'CGNAT Host from table' ];
|
|
}
|
|
}
|
|
}
|
|
$pending_description = '';
|
|
continue;
|
|
}
|
|
|
|
// 4. Try to match IP patterns
|
|
$is_ip_entry = false;
|
|
$entry_data = null;
|
|
|
|
if (preg_match('/^([0-9\.]+)\/(\d+)\s*(.*)$/', $cleaned_line, $matches)) {
|
|
$ip = $matches[1];
|
|
if (!filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
|
|
$errors[] = "Line " . ($line_number + 1) . ": Invalid IPv4 address in network definition: '$ip'. Skipping.";
|
|
$is_ip_entry = true;
|
|
} else {
|
|
$entry_data = ['ip' => $ip, 'cidr' => (int)$matches[2], 'desc' => $matches[3]];
|
|
}
|
|
} elseif (preg_match('/^([0-9\.]+)\s*(.*)$/', $cleaned_line, $matches)) {
|
|
$ip = $matches[1];
|
|
if (!filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) {
|
|
$is_ip_entry = false;
|
|
} else {
|
|
$entry_data = ['ip' => $ip, 'cidr' => 32, 'desc' => $matches[2]];
|
|
}
|
|
}
|
|
|
|
if (!$entry_data && preg_match('/^([0-9a-fA-F:]+)\/\d+/', $cleaned_line, $matches)) {
|
|
if (filter_var($matches[1], FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) {
|
|
$errors[] = "Line " . ($line_number + 1) . ": IPv6 address found and ignored: '$cleaned_line'.";
|
|
$is_ip_entry = true;
|
|
}
|
|
}
|
|
|
|
if ($entry_data) {
|
|
$is_ip_entry = true;
|
|
$network_key = "{$entry_data['ip']}/{$entry_data['cidr']}";
|
|
|
|
if (isset($seen_networks[$network_key])) {
|
|
$errors[] = "Line " . ($line_number + 1) . ": Duplicate entry in wiki file: '$network_key'. Skipping.";
|
|
} else {
|
|
$seen_networks[$network_key] = true;
|
|
$full_description = trim($pending_description . ' ' . $entry_data['desc']);
|
|
|
|
if ($entry_data['cidr'] === 32 && preg_match('/\b(frei|f r e i|reserve[d]?)\b/i', $full_description)) {
|
|
// It is a reserved entry, do not add it to the list.
|
|
} else {
|
|
$entries[] = [
|
|
'ip' => $entry_data['ip'],
|
|
'cidr' => $entry_data['cidr'],
|
|
'name' => $full_description,
|
|
'description' => $full_description,
|
|
];
|
|
}
|
|
}
|
|
}
|
|
|
|
if ($is_ip_entry) {
|
|
$pending_description = '';
|
|
} else {
|
|
$pending_description .= (empty($pending_description) ? '' : "\n") . $cleaned_line;
|
|
}
|
|
}
|
|
|
|
return $entries;
|
|
}
|
|
|
|
|
|
/**
|
|
* Generates the complete SQL script string, including additional parent blocks for better grouping.
|
|
*/
|
|
function generateSqlScript(array $data, string $tableName, array &$errors): string
|
|
{
|
|
$sql = "-- SQL Data for table '$tableName'\n";
|
|
$sql .= "-- Automatically generated from the Wiki documentation (Version 7.0)\n\n";
|
|
$sql .= "TRUNCATE TABLE `$tableName`;\n\n";
|
|
$sql .= "-- Parent Networks (Top-Level)\n";
|
|
|
|
$inserted_networks = [];
|
|
|
|
// Added more specific parent blocks to better organize the network hierarchy.
|
|
$parents = [
|
|
// Private RFC1918 Ranges
|
|
['10.0.0.0', 8, 'Private Network Class A', 'RFC 1918 private address range for large networks.'],
|
|
['172.16.0.0', 12, 'Private Network Class B', 'RFC 1918 private address range for medium networks.'],
|
|
['192.168.0.0', 16, 'Private Network Class C', 'RFC 1918 private address range for small networks.'],
|
|
|
|
// Public & Special Ranges
|
|
['100.64.0.0', 10, 'Carrier-Grade NAT (CGNAT)', 'RFC 6598 address range for Carrier-Grade NAT.'],
|
|
['5.206.200.0', 21, 'Public Network Block 5.206.200.0/21', 'Public IP address range.'],
|
|
['45.82.168.0', 22, 'Public Network Block 45.82.168.0/22', 'Public IP address range.'],
|
|
['46.151.200.0', 21, 'Public Network Block 46.151.200.0/21', 'Public IP address range.'],
|
|
['91.227.230.0', 22, 'Public Network Block 91.227.230.0/22', 'Public IP address range.'],
|
|
['91.227.236.0', 22, 'Public Network Block 91.227.236.0/22', 'Public IP address range.'],
|
|
['185.29.88.0', 22, 'Public Network Block 185.29.88.0/22', 'Public IP address range.'],
|
|
['192.254.252.0', 22, 'Public Network Block 192.254.252.0/22', 'Public IP address range.'],
|
|
['193.105.204.0', 22, 'Public Network Block 193.105.204.0/22', 'Public IP address range.'],
|
|
['193.186.244.0', 22, 'Public Network Block 193.186.244.0/22', 'Public IP address range.'],
|
|
['195.69.183.0', 24, 'Public Subnet 195.69.183.0/24', 'Public IP Subnet for KOLMI.'],
|
|
['195.191.252.0', 24, 'Public Subnet 195.191.252.0/24', 'Public IP Subnet for std Konzentrator.']
|
|
];
|
|
|
|
foreach ($parents as $p) {
|
|
$sql .= generateInsertStatement($tableName, $p[0], $p[1], 'NULL', 'active', $p[2], $p[3]);
|
|
$inserted_networks["{$p[0]}/{$p[1]}"] = true;
|
|
}
|
|
|
|
// Sort data to ensure parent networks are inserted before their children.
|
|
usort($data, function ($a, $b) {
|
|
if ($a['cidr'] != $b['cidr']) {
|
|
return $a['cidr'] <=> $b['cidr'];
|
|
}
|
|
return ip2long($a['ip']) <=> ip2long($b['ip']);
|
|
});
|
|
|
|
$sql .= "\n-- Networks and Hosts from Wiki Documentation (Hierarchical)\n";
|
|
foreach ($data as $network) {
|
|
$network_key = "{$network['ip']}/{$network['cidr']}";
|
|
|
|
if (isset($inserted_networks[$network_key])) {
|
|
$errors[] = "Duplicate network detected (already exists as a top-level parent): '$network_key'. Skipping INSERT.";
|
|
continue;
|
|
}
|
|
|
|
// Subquery to find the immediate parent network already in the table
|
|
$parentSelect = "(SELECT id FROM `$tableName` p WHERE " .
|
|
"INET_ATON('{$network['ip']}') >= p.network_address AND " .
|
|
"INET_ATON('{$network['ip']}') < (p.network_address + POWER(2, 32 - p.cidr)) AND " .
|
|
"p.cidr < {$network['cidr']} " .
|
|
"ORDER BY p.cidr DESC LIMIT 1)";
|
|
|
|
$sql .= generateInsertStatement($tableName, $network['ip'], $network['cidr'], $parentSelect, 'active', $network['name'], $network['description']);
|
|
$inserted_networks[$network_key] = true;
|
|
}
|
|
|
|
return $sql;
|
|
}
|
|
|
|
/**
|
|
* Generates a single SQL INSERT statement.
|
|
*/
|
|
function generateInsertStatement(string $tableName, string $ip, int $cidr, string $parentIdSql, string $status, string $name, string $description): string
|
|
{
|
|
$name = substr(addslashes($name), 0, 100);
|
|
$description = addslashes($description);
|
|
|
|
return "INSERT INTO `$tableName` (`network_address`, `cidr`, `parent_network_id`, `status`, `name`, `description`, `location`, `create`, `edit`) VALUES " .
|
|
"(INET_ATON('$ip'), $cidr, $parentIdSql, '$status', '$name', '$description', NULL, UNIX_TIMESTAMP(), UNIX_TIMESTAMP());\n";
|
|
}
|
|
|
|
?>
|