﻿#!/bin/bash
###############################################################################
# code::core — DNS Analytics Server
# Automated Installation & Setup
# OS: Ubuntu 24.04 LTS
# Target: Single-VM deployment (50K queries/day)
###############################################################################

set -e  # Exit on error

# Color codes for output
RED='\033[0;31m'
GREEN='\033[0;32m'
BLUE='\033[0;34m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color

# Configuration (edit these)
BIND_IP="${BIND_IP:-192.168.234.128}"
BIND_PORT="53"
DB_USER="${DB_USER:-dns_user}"
DB_NAME="${DB_NAME:-dns_analytics}"
DB_PASSWORD="${DB_PASSWORD:-ChangeThisPassword123!}"
INGESTOR_DIR="/opt/dns-ingestor"
LOG_DIR="/var/log/dns-ingestor"

echo -e "${BLUE}================================${NC}"
echo -e "${BLUE}code::core - DNS Analytics Setup${NC}"
echo -e "${BLUE}================================${NC}\n"

# Check if running as root
if [[ $EUID -ne 0 ]]; then
   echo -e "${RED}Error: This script must be run as root${NC}"
   exit 1
fi

# Function for logging
log_info() {
    echo -e "${GREEN}[✓]${NC} $1"
}

log_warn() {
    echo -e "${YELLOW}[!]${NC} $1"
}

log_error() {
    echo -e "${RED}[✗]${NC} $1"
}

###############################################################################
# 1. SYSTEM UPDATES
###############################################################################
echo -e "\n${BLUE}STEP 1: System Updates${NC}"
apt-get update
apt-get install -y curl wget git build-essential
log_info "System updated"

###############################################################################
# 2. INSTALL BIND9
###############################################################################
echo -e "\n${BLUE}STEP 2: Installing BIND9${NC}"
apt-get install -y bind9 bind9utils bind9-doc dnsutils

# Create log directory
mkdir -p /var/log/named
chown bind:bind /var/log/named
chmod 755 /var/log/named

log_info "BIND9 installed"

# Backup original config
if [ -f "/etc/bind/named.conf.options" ]; then
    cp /etc/bind/named.conf.options /etc/bind/named.conf.options.bak
fi

# Create named.conf.options
cat > /etc/bind/named.conf.options << 'EOF'
// DNS Analytics Server -- code::core
acl "trusted" {
    127.0.0.1;
    192.168.0.0/16;
    10.0.0.0/8;
    172.16.0.0/12;
};

options {
    directory "/var/cache/bind";
    listen-on { any; };
    listen-on-v6 { any; };
    allow-query { trusted; };
    allow-recursion { trusted; };
    forwarders { 8.8.8.8; 8.8.4.4; 1.1.1.1; 9.9.9.9; };
    forward only;
    dnssec-validation auto;
    allow-transfer { none; };
    notify no;
    recursion yes;
    max-cache-size 256m;
    min-cache-ttl 60;
    max-cache-ttl 86400;
    rate-limit { responses-per-second 50; window 5; };
    pid-file "/run/named/named.pid";
};
EOF

# Create logging configuration
cat > /etc/bind/named.conf.local << 'EOF'
logging {
    channel queries_log {
        file "/var/log/named/queries.log" versions 7 size 500m;
        severity dynamic;
        print-time yes;
        print-severity no;
        print-category no;
    };

    channel named_log {
        file "/var/log/named/named.log" versions 4 size 50m;
        severity info;
        print-time yes;
        print-severity yes;
        print-category yes;
    };

    category queries { queries_log; };
    category query-errors { queries_log; };
    category default { named_log; };
    category general { named_log; };
    category config { named_log; };
    category network { named_log; };
    category security { named_log; };
    category lame-servers { null; };
    category dnssec { null; };
    category resolver { null; };
    category cname { null; };
    category xfer-in { null; };
    category xfer-out { null; };
    category notify { null; };
    category client { null; };
    category unmatched { null; };
    category dispatch { null; };
    category edns-disabled { null; };
    category rpz { null; };
    category rate-limit { null; };
};
EOF

# Validate config
if sudo named-checkconf 2>/dev/null; then
    log_info "BIND9 configuration valid"
else
    log_error "BIND9 configuration has errors. Please fix manually."
    exit 1
fi

# Enable and restart BIND9
systemctl enable bind9
systemctl restart bind9
log_info "BIND9 started and enabled"

# Test BIND9
sleep 2
if dig @127.0.0.1 google.com A +short > /dev/null 2>&1; then
    log_info "BIND9 query test passed"
else
    log_error "BIND9 query test failed"
fi

###############################################################################
# 3. INSTALL POSTGRESQL
###############################################################################
echo -e "\n${BLUE}STEP 3: Installing PostgreSQL${NC}"
apt-get install -y postgresql postgresql-contrib

systemctl enable postgresql
systemctl restart postgresql

log_info "PostgreSQL installed"

# Create database user and database
sudo -u postgres psql << EOF
CREATE USER $DB_USER WITH PASSWORD '$DB_PASSWORD' NOSUPERUSER NOCREATEDB NOCREATEROLE LOGIN;
CREATE DATABASE $DB_NAME OWNER $DB_USER ENCODING 'UTF8' LC_COLLATE 'en_US.UTF-8' LC_CTYPE 'en_US.UTF-8' TEMPLATE template0;
GRANT ALL PRIVILEGES ON DATABASE $DB_NAME TO $DB_USER;
EOF

log_info "PostgreSQL user and database created"

# Create schema
sudo -u postgres psql -d $DB_NAME << 'EOF'
-- Raw query log, partitioned by day
CREATE TABLE dns_queries (
    id BIGSERIAL,
    queried_at TIMESTAMPTZ NOT NULL,
    client_ip INET NOT NULL,
    client_port INTEGER,
    domain TEXT NOT NULL,
    apex_domain TEXT NOT NULL,
    qtype VARCHAR(10) NOT NULL,
    flags TEXT,
    response_rcode TEXT,
    cached BOOLEAN DEFAULT FALSE,
    inserted_at TIMESTAMPTZ DEFAULT now()
) PARTITION BY RANGE (queried_at);

CREATE TABLE dns_queries_today PARTITION OF dns_queries
    FOR VALUES FROM (CURRENT_DATE) TO (CURRENT_DATE + INTERVAL '1 day');

-- Per-domain count per day
CREATE TABLE dns_daily_stats (
    stat_date DATE NOT NULL,
    domain TEXT NOT NULL,
    apex_domain TEXT NOT NULL,
    qtype VARCHAR(10) NOT NULL,
    hit_count BIGINT NOT NULL DEFAULT 0,
    unique_clients INTEGER NOT NULL DEFAULT 0,
    nxdomain_count INTEGER NOT NULL DEFAULT 0,
    servfail_count INTEGER NOT NULL DEFAULT 0,
    first_seen TIMESTAMPTZ,
    last_seen TIMESTAMPTZ,
    PRIMARY KEY (stat_date, domain, qtype)
);

-- Weekly aggregates
CREATE TABLE dns_weekly_stats (
    week_start DATE NOT NULL,
    week_end DATE NOT NULL,
    apex_domain TEXT NOT NULL,
    total_queries BIGINT NOT NULL DEFAULT 0,
    unique_clients INTEGER NOT NULL DEFAULT 0,
    peak_day DATE,
    peak_day_count BIGINT,
    category TEXT,
    PRIMARY KEY (week_start, apex_domain)
);

-- Monthly aggregates
CREATE TABLE dns_monthly_stats (
    year_month CHAR(7) NOT NULL,
    apex_domain TEXT NOT NULL,
    total_queries BIGINT NOT NULL DEFAULT 0,
    unique_clients INTEGER NOT NULL DEFAULT 0,
    avg_daily NUMERIC(12,2),
    rank_in_month INTEGER,
    category TEXT,
    PRIMARY KEY (year_month, apex_domain)
);

-- Hourly heatmap
CREATE TABLE dns_hourly_heatmap (
    stat_date DATE NOT NULL,
    hour_of_day SMALLINT NOT NULL CHECK (hour_of_day BETWEEN 0 AND 23),
    apex_domain TEXT NOT NULL,
    query_count BIGINT NOT NULL DEFAULT 0,
    PRIMARY KEY (stat_date, hour_of_day, apex_domain)
);

-- Domain categorization
CREATE TABLE domain_categories (
    apex_domain TEXT PRIMARY KEY,
    category TEXT NOT NULL,
    provider TEXT,
    cache_priority SMALLINT DEFAULT 5,
    tagged_at TIMESTAMPTZ DEFAULT now()
);

-- Seed domain categories
INSERT INTO domain_categories VALUES
('google.com','search','Google',3,now()),
('googleapis.com','cdn','Google',2,now()),
('youtube.com','streaming','Google',1,now()),
('googlevideo.com','streaming','Google',1,now()),
('netflix.com','streaming','Netflix',1,now()),
('nflxvideo.net','streaming','Netflix',1,now()),
('nflximg.net','cdn','Netflix',2,now()),
('akamai.net','cdn','Akamai',1,now()),
('akamaiedge.net','cdn','Akamai',1,now()),
('cloudflare.com','cdn','Cloudflare',2,now()),
('facebook.com','social','Meta',4,now()),
('instagram.com','social','Meta',4,now()),
('whatsapp.net','messaging','Meta',3,now()),
('whatsapp.com','messaging','Meta',3,now()),
('twitter.com','social','X',4,now()),
('twimg.com','cdn','X',2,now()),
('tiktok.com','social','TikTok',4,now()),
('microsoft.com','work','Microsoft',5,now()),
('office.com','work','Microsoft',5,now()),
('windowsupdate.com','updates','Microsoft',8,now()),
('ubuntu.com','updates','Canonical',8,now()),
('amazonaws.com','cloud','AWS',5,now()),
('fastly.net','cdn','Fastly',2,now()),
('cloudfront.net','cdn','AWS',2,now()),
('doubleclick.net','ads','Google',9,now()),
('googlesyndication.com','ads','Google',9,now());

-- Create indexes
CREATE INDEX idx_q_time ON dns_queries (queried_at DESC);
CREATE INDEX idx_q_apex ON dns_queries (apex_domain, queried_at DESC);
CREATE INDEX idx_q_client ON dns_queries (client_ip, queried_at DESC);
CREATE INDEX idx_d_apex ON dns_daily_stats (apex_domain, stat_date DESC);
CREATE INDEX idx_d_hits ON dns_daily_stats (stat_date, hit_count DESC);
CREATE INDEX idx_w_domain ON dns_weekly_stats (apex_domain, week_start DESC);
CREATE INDEX idx_m_domain ON dns_monthly_stats (apex_domain, year_month DESC);
CREATE EXTENSION IF NOT EXISTS pg_trgm;
CREATE INDEX idx_d_trgm ON dns_daily_stats USING gin (apex_domain gin_trgm_ops);

COMMIT;
EOF

log_info "PostgreSQL schema created"

###############################################################################
# 4. INSTALL PYTHON & INGESTOR
###############################################################################
echo -e "\n${BLUE}STEP 4: Installing Python Ingestor${NC}"
apt-get install -y python3 python3-pip python3-venv

mkdir -p $INGESTOR_DIR
mkdir -p $LOG_DIR
chown -R root:root $INGESTOR_DIR

# Create Python virtual environment
cd $INGESTOR_DIR
python3 -m venv venv
source venv/bin/activate
pip install --upgrade pip
pip install psycopg2-binary

log_info "Python environment created"

# Create ingestor script
cat > $INGESTOR_DIR/ingestor.py << 'EOF'
#!/usr/bin/env python3
import re, time, logging, os, signal, sys
from datetime import datetime
import psycopg2
from psycopg2.extras import execute_values

# Database configuration
DB_CONFIG = {
    'dbname': os.environ.get('DB_NAME', 'dns_analytics'),
    'user': os.environ.get('DB_USER', 'dns_user'),
    'password': os.environ.get('DB_PASSWORD', 'ChangeThisPassword123!'),
    'host': '127.0.0.1',
    'port': 5432
}

LOG_FILE = '/var/log/named/queries.log'
BATCH_SIZE = 100
FLUSH_INTERVAL = 5

# Regex to parse BIND9 queries
QUERY_RE = re.compile(
    r'^(?P<ts>\d{2}-\w{3}-\d{4} \d{2}:\d{2}:\d{2})\.\d+'
    r'\s+client\s+(?:@\S+\s+)?(?P<ip>[\d\.a-fA-F:]+)#(?P<port>\d+)'
    r'\s+\([^)]+\):\s+query:\s+(?P<domain>[\w.\-]+)\s+IN\s+(?P<qtype>\w+)'
    r'\s+(?P<flags>[+\-\w\s]*?)(?:\s+\([\d.]+\))?$'
)

def get_apex_domain(domain):
    """Extract apex domain (e.g., google.com from mail.google.com)"""
    parts = domain.rstrip('.').split('.')
    return '.'.join(parts[-2:]) if len(parts) >= 2 else domain

def parse_timestamp(ts_str):
    """Parse BIND9 timestamp format"""
    try:
        return datetime.strptime(ts_str, '%d-%b-%Y %H:%M:%S')
    except:
        return datetime.utcnow()

def parse_query_line(line):
    """Parse BIND9 query log line"""
    line = line.strip()
    if not line:
        return None

    match = QUERY_RE.match(line)
    if not match:
        return None

    data = match.groupdict()
    domain = data.get('domain', '').lower().rstrip('.')

    if not domain:
        return None

    flags = data.get('flags', '').strip()

    return {
        'queried_at': parse_timestamp(data['ts']),
        'client_ip': data.get('ip', '0.0.0.0'),
        'client_port': int(data.get('port', 0)),
        'domain': domain,
        'apex_domain': get_apex_domain(domain),
        'qtype': data.get('qtype', 'A').upper(),
        'flags': flags,
        'response_rcode': 'NOERROR',
        'cached': 'CL' in flags
    }

def connect_db():
    """Connect to PostgreSQL with retry logic"""
    while True:
        try:
            conn = psycopg2.connect(**DB_CONFIG)
            conn.autocommit = False
            logging.info('PostgreSQL connected')
            return conn
        except psycopg2.OperationalError as e:
            logging.error(f'DB connection failed: {e}')
            time.sleep(10)

def flush_batch(conn, batch):
    """Bulk insert parsed queries"""
    if not batch:
        return 0

    sql = (
        'INSERT INTO dns_queries '
        '(queried_at, client_ip, client_port, domain, apex_domain, '
        'qtype, flags, response_rcode, cached) VALUES %s '
        'ON CONFLICT DO NOTHING'
    )

    rows = [
        (r['queried_at'], r['client_ip'], r['client_port'], r['domain'],
         r['apex_domain'], r['qtype'], r['flags'], r['response_rcode'], r['cached'])
        for r in batch
    ]

    try:
        with conn.cursor() as cur:
            execute_values(cur, sql, rows, page_size=500)
        conn.commit()
        return len(batch)
    except psycopg2.Error as e:
        logging.error(f'Insert failed: {e}')
        conn.rollback()
        return 0

def run_ingestor(conn):
    """Main ingestor loop"""
    batch = []
    last_flush = time.time()
    total_inserted = 0
    inode = 0
    log_file = None

    logging.info(f'Watching {LOG_FILE}')

    while True:
        try:
            current_inode = os.stat(LOG_FILE).st_ino
        except FileNotFoundError:
            time.sleep(5)
            continue

        # File rotated
        if current_inode != inode:
            try:
                if log_file:
                    log_file.close()
                log_file = open(LOG_FILE, 'r')
                log_file.seek(0, 2)  # Seek to end
                inode = current_inode
            except IOError:
                time.sleep(5)
                continue

        # Read new lines
        while True:
            line = log_file.readline()
            if not line:
                break

            record = parse_query_line(line)
            if record:
                batch.append(record)

        # Flush if batch size or time threshold reached
        now = time.time()
        if len(batch) >= BATCH_SIZE or (batch and now - last_flush >= FLUSH_INTERVAL):
            inserted = flush_batch(conn, batch)
            total_inserted += inserted
            if inserted > 0:
                logging.info(f'Flushed {inserted} records | Total: {total_inserted:,}')
            batch = []
            last_flush = now

        time.sleep(0.5)

if __name__ == '__main__':
    # Setup logging
    os.makedirs(LOG_DIR, exist_ok=True)
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s [%(levelname)s] %(message)s',
        handlers=[
            logging.StreamHandler(sys.stdout),
            logging.FileHandler(f'{LOG_DIR}/ingestor.log')
        ]
    )

    # Signal handlers
    def shutdown(sig, frame):
        logging.info('Shutting down...')
        sys.exit(0)

    signal.signal(signal.SIGTERM, shutdown)
    signal.signal(signal.SIGINT, shutdown)

    # Run
    run_ingestor(connect_db())
EOF

chmod +x $INGESTOR_DIR/ingestor.py
log_info "Ingestor script created"

###############################################################################
# 5. CREATE SYSTEMD SERVICE
###############################################################################
echo -e "\n${BLUE}STEP 5: Setting up Systemd Service${NC}"

cat > /etc/systemd/system/dns-ingestor.service << EOF
[Unit]
Description=DNS Query Log Ingestor -- code::core
After=network.target postgresql.service bind9.service
Requires=postgresql.service

[Service]
Type=simple
User=root
WorkingDirectory=$INGESTOR_DIR
Environment="DB_NAME=$DB_NAME"
Environment="DB_USER=$DB_USER"
Environment="DB_PASSWORD=$DB_PASSWORD"
ExecStart=$INGESTOR_DIR/venv/bin/python3 $INGESTOR_DIR/ingestor.py
Restart=always
RestartSec=10s
LimitNOFILE=65536

[Install]
WantedBy=multi-user.target
EOF

systemctl daemon-reload
systemctl enable dns-ingestor
systemctl restart dns-ingestor

log_info "Systemd service created and started"

# Wait for service to start
sleep 3
if systemctl is-active --quiet dns-ingestor; then
    log_info "Ingestor service is running"
else
    log_error "Ingestor service failed to start"
    journalctl -u dns-ingestor -n 20
fi

###############################################################################
# 6. CREATE CRON JOBS FOR ROLLUPS
###############################################################################
echo -e "\n${BLUE}STEP 6: Setting up Cron Rollups${NC}"

mkdir -p $INGESTOR_DIR/sql

# Daily rollup
cat > $INGESTOR_DIR/sql/rollup_daily.sql << 'EOF'
INSERT INTO dns_daily_stats
    (stat_date, domain, apex_domain, qtype, hit_count, unique_clients,
     nxdomain_count, servfail_count, first_seen, last_seen)
SELECT
    DATE(queried_at AT TIME ZONE 'Africa/Kampala') AS stat_date,
    domain, apex_domain, qtype,
    COUNT(*), COUNT(DISTINCT client_ip),
    COUNT(*) FILTER (WHERE response_rcode='NXDOMAIN'),
    COUNT(*) FILTER (WHERE response_rcode='SERVFAIL'),
    MIN(queried_at), MAX(queried_at)
FROM dns_queries
WHERE queried_at >= (CURRENT_DATE-INTERVAL '1 day') AT TIME ZONE 'Africa/Kampala'
  AND queried_at  < CURRENT_DATE AT TIME ZONE 'Africa/Kampala'
GROUP BY 1, domain, apex_domain, qtype
ON CONFLICT (stat_date, domain, qtype) DO UPDATE SET
    hit_count=EXCLUDED.hit_count,
    unique_clients=EXCLUDED.unique_clients,
    nxdomain_count=EXCLUDED.nxdomain_count,
    servfail_count=EXCLUDED.servfail_count,
    first_seen=EXCLUDED.first_seen,
    last_seen=EXCLUDED.last_seen;

INSERT INTO dns_hourly_heatmap (stat_date, hour_of_day, apex_domain, query_count)
SELECT DATE(queried_at AT TIME ZONE 'Africa/Kampala'),
    EXTRACT(HOUR FROM queried_at AT TIME ZONE 'Africa/Kampala')::SMALLINT,
    apex_domain, COUNT(*)
FROM dns_queries
WHERE queried_at >= (CURRENT_DATE-INTERVAL '1 day') AT TIME ZONE 'Africa/Kampala'
  AND queried_at  < CURRENT_DATE AT TIME ZONE 'Africa/Kampala'
GROUP BY 1, 2, 3
ON CONFLICT (stat_date, hour_of_day, apex_domain) DO UPDATE SET query_count=EXCLUDED.query_count;

COMMIT;
EOF

# Add to crontab
(crontab -l 2>/dev/null || true; echo "0 1 * * * psql -U $DB_USER -d $DB_NAME -h localhost -f $INGESTOR_DIR/sql/rollup_daily.sql >> $LOG_DIR/daily.log 2>&1") | crontab -

log_info "Cron jobs configured"

###############################################################################
# 7. FIREWALL SETUP
###############################################################################
echo -e "\n${BLUE}STEP 7: Configuring Firewall${NC}"

apt-get install -y ufw

ufw --force enable
ufw allow 22/tcp  # SSH
ufw allow from 192.168.0.0/16 to any port 53 proto udp
ufw allow from 192.168.0.0/16 to any port 53 proto tcp
ufw allow from 10.0.0.0/8 to any port 53 proto udp
ufw allow from 10.0.0.0/8 to any port 53 proto tcp
ufw allow from 172.16.0.0/12 to any port 53 proto udp
ufw allow from 172.16.0.0/12 to any port 53 proto tcp
ufw allow from 127.0.0.1 to any port 5432 proto tcp

log_info "Firewall configured"

###############################################################################
# 8. FINAL VERIFICATION
###############################################################################
echo -e "\n${BLUE}STEP 8: Verification Tests${NC}"

echo -e "\n${YELLOW}Testing BIND9...${NC}"
if dig @127.0.0.1 google.com A +short > /dev/null 2>&1; then
    log_info "BIND9 responding to queries"
else
    log_error "BIND9 not responding"
fi

echo -e "\n${YELLOW}Testing PostgreSQL...${NC}"
if sudo -u postgres psql -d $DB_NAME -c "SELECT COUNT(*) FROM domain_categories;" 2>/dev/null | grep -q 26; then
    log_info "PostgreSQL contains domain categories"
else
    log_error "PostgreSQL connection failed"
fi

echo -e "\n${YELLOW}Testing Ingestor...${NC}"
INGESTOR_PID=$(pgrep -f "dns-ingestor" || echo "0")
if [ "$INGESTOR_PID" != "0" ]; then
    log_info "Ingestor daemon is running"
else
    log_error "Ingestor daemon not running"
fi

###############################################################################
# 9. OUTPUT SUMMARY
###############################################################################
echo -e "\n${GREEN}================================${NC}"
echo -e "${GREEN}Installation Complete!${NC}"
echo -e "${GREEN}================================${NC}\n"

echo -e "${BLUE}Configuration Summary:${NC}"
echo -e "  BIND9 IP:     $BIND_IP:$BIND_PORT"
echo -e "  DB User:      $DB_USER"
echo -e "  DB Name:      $DB_NAME"
echo -e "  Ingestor:     $INGESTOR_DIR"
echo -e "  Logs:         $LOG_DIR"

echo -e "\n${BLUE}Next Steps:${NC}"
echo -e "  1. Test DNS queries:"
echo -e "     dig @127.0.0.1 google.com A +short"
echo -e "  2. Check ingestor status:"
echo -e "     sudo journalctl -u dns-ingestor -f"
echo -e "  3. Query database after 7 days:"
echo -e "     psql -U $DB_USER -d $DB_NAME -h localhost"
echo -e "     SELECT COUNT(*) FROM dns_queries;"
echo -e "  4. View teaching guide:"
echo -e "     Open index.html in your browser"

echo -e "\n${BLUE}Documentation:${NC}"
echo -e "  README.md                 - Architecture & learning path"
echo -e "  CLOUDFLARE_DEPLOYMENT.md  - Deployment to Cloudflare Pages + Workers"
echo -e "  index.html                - Full teaching guide (open in browser)"

echo -e "\n${GREEN}Happy analyzing! [rocket]${NC}\n"




