#!/usr/bin/perl

# Henri Yandell
# 2004-05-19
# Happy 18 weeks little foetus

# Based on the mt2blojsom.pl script: http://wiki.blojsom.com/wiki/download/attachments/140/mt2blojsom.pl

# BODY and EXTENDED BODY both map into weblogentry.text
# TITLE maps to weblogentry.title
# STATUS: publish maps to weblogentry.publish of 1 (check)
# DATE: maps to weblogentry.pubtime

# COMMENT: kicks off a comment
  # AUTHOR maps to comment.name
  # EMAIL maps to comment.email
  # URL maps to comment.url
  # DATE maps to comment.posttime
  # text following DATE maps to comment.content

use Time::Local;

my $INPUT = $ARGV[0];
my $OUTPUT_DIR = $ARGV[1];

# Find out the websiteid:  40288d96fc909c9800fc909d715a0003
$websiteid = "40288d96fc909c9800fc909d715a0003";

# Find out the max id for:  comment.id, weblogcategory.id, weblogentry.id
my $weblogentryId = 1;
my $weblogcategoryId = 1;
my $commentId = 1;

open(FILE,"< $INPUT") || die ("Could not open input file $INPUT\n");

# Table-state: BLOG COMMENT
my $tableState = "BLOG";
# States: TEXT NEXTLINE
my $state = "NEXTLINE";

my $text = "";

my %categories;

my $author;
my $date;
my $categoryId;
my $title;
my $anchor;
my $publish;

my $c_author;
my $c_date;
my $c_email;
my $c_url;
my $c_text;

while (<FILE>) {

	chomp;

    if( $state eq "TEXT" ) {
        if( /-----/ ) {
            if($tableState eq "COMMENT") {
# Time to insert into the comment table
                $c_text =~ s/'/\\'/g;
                $c_text =~ s//<br\/>/g;
                $c_author =~ s/'/\\'/g;
                print "INSERT INTO comment VALUES('$commentId', '$weblogentryId', '$c_author', '$c_email', '$c_url', '$c_text', $c_date);\n";
                $commentId++;
                $c_text = "";
                $tableState = "BLOG";
            }
            $state = "NEXTLINE";
        } else {
            if($tableState eq "COMMENT") {
                $c_text .= $_;
            } else {
                $text .= $_;
            }
        }
        next;
    }

    if( /--------/ ) {
# Time to insert into the blog table
        $text =~ s/'/\\'/g;
        $text =~ s//<br\/>/g;
        $title =~ s/'/\\'/g;
        print "INSERT INTO weblogentry VALUES('$weblogentryId', '$anchor', '$title', '$text', $date, $date, '$websiteid', '$categoryId', $publish);\n";
        $weblogentryId++;
        $text = "";
        $tableState = "BLOG";
    }


	if (/BODY:/ or /EXTENDED BODY:/)
	{
        $state = "TEXT";
	}

    if(/COMMENT:/) {
        $tableState = "COMMENT";
        next;
    }

    if(/AUTHOR:\s(.*)/ ) {
        if( $tableState == "COMMENT" ) {
            $c_author = $1;
        } else {
            $author = $1;
        }
    }

    if(/EMAIL:\s(.*)/ ) {
        $c_email = $1;
    }

    if(/URL:\s(.*)/ ) {
        $c_url = $1;
    }

    if(/STATUS: publish/) {
        $publish=1;
    }

    if(/STATUS: draft/) {
        $publish=0;
    }

	if(/TITLE:\s(.*)/)
	{
		$title = $1;

        # come up with anchor
        $anchor = $title;
        $anchor =~ s/\W/ /g;
        $anchor = lc $anchor;

        @list = split(" ", $anchor);
        if($#list > 3) {
            $#list = 3;
        }
        $anchor = join("_", @list);
	}

	if(/PRIMARY CATEGORY:\s(.*)/)
	{
        $category = $1;
        if( $category eq "" ) {
            $category = "undefined";
        }
		if( defined $categories{$category} ) {
            $categoryId = $categories{$category};
        } else {
            $categoryId = $weblogcategoryId;
            print "INSERT INTO weblogcategory VALUES('$weblogcategoryId', '$category', NULL, '$websiteid', NULL);\n";
            $categories{$category} = $weblogcategoryId;
            $weblogcategoryId++;
        }
	}

	if(/DATE:\s(\d\d)\/(\d\d)\/(\d\d\d\d)\s(\d\d):(\d\d):(\d\d)\s(.*)/)
	{
        $hour = $4;
		if ($7 eq "PM" && $4 < 12)
		{
			$hour = $4 + 12;
		}
        $d = "$3$1$2$hour$5$6"; #      20040518230256 

        if( $tableState eq "COMMENT" ) {
            $state = "TEXT";
            $c_date = $d;
        } else {
            $date = $d;
        }
	}
}

close(FILE);
