$line) { // 1. Clean up the line from multiple inconsistencies $cleaned_line = html_entity_decode($line, ENT_QUOTES | ENT_HTML5); $cleaned_line = str_replace(['\\', ' '], ['', ' '], $cleaned_line); $cleaned_line = preg_replace('/\[([^\]]+)\]\(mailto:[^\)]+\)/', '$1', $cleaned_line); $cleaned_line = trim(preg_replace('/\s+/', ' ', $cleaned_line)); // 2. Check for section headers (START, END, etc.) if (str_starts_with($cleaned_line, '=====')) { $pending_description = ''; // Reset for any new section $ignore_section = (stripos($line, 'gelöschte IP Netze') !== false); // IMPROVEMENT: Specifically parse START headers for network definitions if (!$ignore_section && preg_match('/^===== START\s+([0-9\.]+)(?:[\/-](\d{1,2}))?\s*(.*?)(?:\.md)?\s*=====$/', $cleaned_line, $header_matches)) { $ip_from_header = $header_matches[1]; $cidr_from_header = $header_matches[2] ?? null; $desc_from_header = trim($header_matches[3]); // Heuristic: If CIDR is missing but IP ends in .0, assume it's a /24 network block. if ($cidr_from_header === null && preg_match('/\.0$/', $ip_from_header)) { $cidr_from_header = 24; if (empty($desc_from_header) || strtolower($desc_from_header) === 'linknetze') { $desc_from_header = "Network Block {$ip_from_header}"; } } // If a full network/CIDR is defined or inferred from the header, add it as a distinct entry. if ($cidr_from_header !== null && filter_var($ip_from_header, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) { $network_key = "$ip_from_header/$cidr_from_header"; if (!isset($seen_networks[$network_key])) { $entries[] = [ 'ip' => $ip_from_header, 'cidr' => (int)$cidr_from_header, 'name' => $desc_from_header, 'description' => $desc_from_header, ]; $seen_networks[$network_key] = true; } } } continue; // Header processed, move to next line } if ($ignore_section || empty($cleaned_line)) { if (empty($cleaned_line)) $pending_description = ''; continue; } // 3. Handle special HTML table data if (strpos($line, ']*>([\d\.]+)<\/td>/', $line, $ip_matches); if (!empty($ip_matches[1])) { foreach ($ip_matches[1] as $ip_from_table) { if (filter_var($ip_from_table, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) { $entries[] = [ 'ip' => $ip_from_table, 'cidr' => 32, 'name' => 'CGNAT Host', 'description' => 'CGNAT Host from table' ]; } } } $pending_description = ''; continue; } // 4. Try to match IP patterns $is_ip_entry = false; $entry_data = null; if (preg_match('/^([0-9\.]+)\/(\d+)\s*(.*)$/', $cleaned_line, $matches)) { $ip = $matches[1]; if (!filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) { $errors[] = "Line " . ($line_number + 1) . ": Invalid IPv4 address in network definition: '$ip'. Skipping."; $is_ip_entry = true; } else { $entry_data = ['ip' => $ip, 'cidr' => (int)$matches[2], 'desc' => $matches[3]]; } } elseif (preg_match('/^([0-9\.]+)\s*(.*)$/', $cleaned_line, $matches)) { $ip = $matches[1]; if (!filter_var($ip, FILTER_VALIDATE_IP, FILTER_FLAG_IPV4)) { $is_ip_entry = false; } else { $entry_data = ['ip' => $ip, 'cidr' => 32, 'desc' => $matches[2]]; } } if (!$entry_data && preg_match('/^([0-9a-fA-F:]+)\/\d+/', $cleaned_line, $matches)) { if (filter_var($matches[1], FILTER_VALIDATE_IP, FILTER_FLAG_IPV6)) { $errors[] = "Line " . ($line_number + 1) . ": IPv6 address found and ignored: '$cleaned_line'."; $is_ip_entry = true; } } if ($entry_data) { $is_ip_entry = true; $network_key = "{$entry_data['ip']}/{$entry_data['cidr']}"; if (isset($seen_networks[$network_key])) { $errors[] = "Line " . ($line_number + 1) . ": Duplicate entry in wiki file: '$network_key'. Skipping."; } else { $seen_networks[$network_key] = true; $full_description = trim($pending_description . ' ' . $entry_data['desc']); if ($entry_data['cidr'] === 32 && preg_match('/\b(frei|f r e i|reserve[d]?)\b/i', $full_description)) { // It is a reserved entry, do not add it to the list. } else { $entries[] = [ 'ip' => $entry_data['ip'], 'cidr' => $entry_data['cidr'], 'name' => $full_description, 'description' => $full_description, ]; } } } if ($is_ip_entry) { $pending_description = ''; } else { $pending_description .= (empty($pending_description) ? '' : "\n") . $cleaned_line; } } return $entries; } /** * Generates the complete SQL script string, including additional parent blocks for better grouping. */ function generateSqlScript(array $data, string $tableName, array &$errors): string { $sql = "-- SQL Data for table '$tableName'\n"; $sql .= "-- Automatically generated from the Wiki documentation (Version 7.0)\n\n"; $sql .= "TRUNCATE TABLE `$tableName`;\n\n"; $sql .= "-- Parent Networks (Top-Level)\n"; $inserted_networks = []; // Added more specific parent blocks to better organize the network hierarchy. $parents = [ // Private RFC1918 Ranges ['10.0.0.0', 8, 'Private Network Class A', 'RFC 1918 private address range for large networks.'], ['172.16.0.0', 12, 'Private Network Class B', 'RFC 1918 private address range for medium networks.'], ['192.168.0.0', 16, 'Private Network Class C', 'RFC 1918 private address range for small networks.'], // Public & Special Ranges ['100.64.0.0', 10, 'Carrier-Grade NAT (CGNAT)', 'RFC 6598 address range for Carrier-Grade NAT.'], ['5.206.200.0', 21, 'Public Network Block 5.206.200.0/21', 'Public IP address range.'], ['45.82.168.0', 22, 'Public Network Block 45.82.168.0/22', 'Public IP address range.'], ['46.151.200.0', 21, 'Public Network Block 46.151.200.0/21', 'Public IP address range.'], ['91.227.230.0', 22, 'Public Network Block 91.227.230.0/22', 'Public IP address range.'], ['91.227.236.0', 22, 'Public Network Block 91.227.236.0/22', 'Public IP address range.'], ['185.29.88.0', 22, 'Public Network Block 185.29.88.0/22', 'Public IP address range.'], ['192.254.252.0', 22, 'Public Network Block 192.254.252.0/22', 'Public IP address range.'], ['193.105.204.0', 22, 'Public Network Block 193.105.204.0/22', 'Public IP address range.'], ['193.186.244.0', 22, 'Public Network Block 193.186.244.0/22', 'Public IP address range.'], ['195.69.183.0', 24, 'Public Subnet 195.69.183.0/24', 'Public IP Subnet for KOLMI.'], ['195.191.252.0', 24, 'Public Subnet 195.191.252.0/24', 'Public IP Subnet for std Konzentrator.'] ]; foreach ($parents as $p) { $sql .= generateInsertStatement($tableName, $p[0], $p[1], 'NULL', 'active', $p[2], $p[3]); $inserted_networks["{$p[0]}/{$p[1]}"] = true; } // Sort data to ensure parent networks are inserted before their children. usort($data, function ($a, $b) { if ($a['cidr'] != $b['cidr']) { return $a['cidr'] <=> $b['cidr']; } return ip2long($a['ip']) <=> ip2long($b['ip']); }); $sql .= "\n-- Networks and Hosts from Wiki Documentation (Hierarchical)\n"; foreach ($data as $network) { $network_key = "{$network['ip']}/{$network['cidr']}"; if (isset($inserted_networks[$network_key])) { $errors[] = "Duplicate network detected (already exists as a top-level parent): '$network_key'. Skipping INSERT."; continue; } // Subquery to find the immediate parent network already in the table $parentSelect = "(SELECT id FROM `$tableName` p WHERE " . "INET_ATON('{$network['ip']}') >= p.network_address AND " . "INET_ATON('{$network['ip']}') < (p.network_address + POWER(2, 32 - p.cidr)) AND " . "p.cidr < {$network['cidr']} " . "ORDER BY p.cidr DESC LIMIT 1)"; $sql .= generateInsertStatement($tableName, $network['ip'], $network['cidr'], $parentSelect, 'active', $network['name'], $network['description']); $inserted_networks[$network_key] = true; } return $sql; } /** * Generates a single SQL INSERT statement. */ function generateInsertStatement(string $tableName, string $ip, int $cidr, string $parentIdSql, string $status, string $name, string $description): string { $name = substr(addslashes($name), 0, 100); $description = addslashes($description); return "INSERT INTO `$tableName` (`network_address`, `cidr`, `parent_network_id`, `status`, `name`, `description`, `location`, `create`, `edit`) VALUES " . "(INET_ATON('$ip'), $cidr, $parentIdSql, '$status', '$name', '$description', NULL, UNIX_TIMESTAMP(), UNIX_TIMESTAMP());\n"; } ?>