#!/usr/bin/perl -w # # ddl-to-graph - converts SQL DDL into a graph specification for dot(1) # # Author: Tom Moertel # 31 Jan 2001 # # $Id: ddl-to-graph,v 1.3 2002/06/19 02:03:39 thor Exp $ # # # See END for docs. # # # LICENSE # # Copyright (C) 2001 Thomas Moertel # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # The text of the GNU GPL may be found online: # http://www.gnu.org/copyleft/gpl.html # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # Except as provided for in the GNU GPL, all rights are reserved # worldwide. # # # Modification history # # 31 Jan 2001 TGM - original # use strict; # config ------------------------------------------------------------------ my $FONT = "Times-Roman"; # main --------------------------------------------------------------------- # PARSE OPTIONS my $opt_guess_fks = @ARGV && $ARGV[0] =~ /--?g/ ? shift : 0; # FIRST HALF: PARSE THE SQL DDL my %tables; my $ddl = lc(slurp_everything()); # remove special characters like [ and ] that MS Access & SQL Server use # and generally soften up the SQL for the pattern matching to come $ddl =~ tr/[]//d; # remove brackets $ddl =~ tr/./_/; # convert dots to underbars $ddl =~ s/\s+\(/\(/g; # remove white space before parens # parse SQL data-definition language, one "create table" at a time while ($ddl =~ /create\s+table\s+ # "create table" ([a-z0-9_]+) # table name .*? # anything (minimal match) \( # "(" (.*?) # sql code (minimal match) \)\s*(?:[^)\r\n]*;|on|go) # ") xxx ;" or ") on" or ") go" /xsig) { # we have a create table statement => create a Table object from it my ($tablename, $tablesql) = ($1, $2); my $table = new Table $tablename; # pluck out the primary key (we handle compound keys, too) $table->pk($1) while $tablesql =~ /primary\s+key[^\(]*\(\s*([a-z0-9_ ,]+)\s*\)/sig; # and any foreign keys (compound, too) $table->add_fk($1, $2, $3, \%tables) while $tablesql =~ /foreign\s+key[^\(]* # leading syntax \(\s*([a-z0-9_ ,]+)\s*\)\s* # field(s) references\s+(\w+) # other table (?:\s*\(\s*(\w+)\s*\))?/xmig; # optional field(s) # add the fields while ($tablesql =~ /^(.*)$/mg) { my $line = $1; next if $line =~ /^\s*primary\s+key/i; # skip primary key decls next if $line =~ /^\s*foreign\s+key/i; # and foreign keys decls next if $line =~ /^\s*check\s*\(/i; # and check constraints next if $line =~ /^\s*--/; # and comments $table->add_field($1, $2) if $line =~ /(\w+)\s+([A-Za-z0-9]+(?:\([^\)]*\)))/; } # finally, add the fully loaded Table object to our hash of tables $tables{lc $tablename} = $table; } # parse through SQL again, looking for ALTER TABLE statements that add # primary keys or foreign keys while ($ddl =~ /alter\s+table\s+ # start of A.T. statement ([a-z0-9_]+) # table name (.*?) # body (minimal match) (?:go|;) # end of statement /xsig) { my ($tablename, $body) = (lc $1, lc $2); my $table = $tables{$tablename}; unless ($table) { print STDERR "warning: table $tablename is unknown\n"; next; } # look for primary key $table->pk($1) while $body =~ /primary\s+key[^(]*\(([^)]+)\)/mgi; # and foreign keys $table->add_fk($1, $2, $3, \%tables) while $body =~ /foreign\s+key[^\(]* # leading syntax \(\s*([a-z0-9_ ,]+)\s*\)\s* # field(s) references\s+(\w+) # other table (?:\s*\(\s*(\w+)\s*\))?/xmig; # optional field(s) } # If requested, guess foreign keys via naming conventions if ($opt_guess_fks) { # build a mapping from primary keys to the owning tables my %pk_tables; while (my ($tablename, $table) = each %tables) { my $pk = $table->pk; $pk_tables{$pk} = $tablename if $pk; } # scan all tables for fields names that match pk names and add # foreign keys for matches while (my ($tablename, $table) = each %tables) { my $pk = $table->pk || ""; my @fields = keys %{$table->fields}; foreach my $field (@fields) { my $pk_table = $pk_tables{$field}; next unless $pk_table && $pk_table ne $tablename; $table->add_fk($field, $pk_table, $field, \%tables); } } } # SECOND HALF: GENERATE OUTPUT FOR DOT(1) # print initial block of dot input, which sets up the directed graph # and our preferred layout settings print qq( digraph G { ratio=auto; page="8.5,11"; ranksep="1.5"; rankdir=LR; concentrate=true; node [shape=record,fontname="$FONT",fontsize=10]; ); # now, generate output for each table while (my ($tablename, $table) = each %tables) { # grab fields and primary key for easy reference my $fields = $table->fields; my $pk = $table->pk || ""; # for this table ... print( # create a new record node, labeled with the table's name ... "$tablename [ label = \"", join('|', uc $tablename, # followed by the field(s) of the primary key, if any ... (map { "<$_> $_ * : $fields->{$_}" } split(/,/, $pk)), # and then the remaining fields ... (map { "<$_> $_ : $fields->{$_}" } grep { ! pk_componentQ($_, $pk) } sort keys %$fields)), # and then finish off the node "\"];\n"); # finally, add edges for all the foreign keys of this table my $fks = $table->fks; print(map {"$tablename:$_ -> $fks->{$_}[0]:$fks->{$_}[1];\n"} keys %$fks); } # finish off the directed graph print "}\n"; # WE'RE DONE! # subs --------------------------------------------------------------------- sub pk_componentQ { # predicate: tests whether $field is part of a potentially # multi-key primary key $pkey my ($field, $pkey) = @_; return scalar grep {$_ eq $field} split(/,/, $pkey); } sub slurp_everything { # slurp in all awaiting input local $/; # makes $/ undefined, too return <>; } # packages ----------------------------------------------------------------- BEGIN { package Table; sub new { my ($self, $name) = @_; my $class = ref($self) || $self; bless { "name"=>lc($name), "fields"=>{}, pk=>undef, fks=>{} }, $class; } sub add_field { my ($self, $field, $type) = @_; $self->{fields}{lc $field} = lc $type; } sub fields { my ($self, $field) = @_; return $self->{fields}{lc $field} if $field; return $self->{fields}; } sub pk { # primary key my ($self, $pk) = @_; $pk =~ s/\s+//g if $pk; $self->{pk} = lc $pk if $pk; return $self->{pk}; } sub add_fk { # foreign key my ($self, $fk, $table, $field, $tables) = @_; $field ||= $tables->{$table}->pk; # defaults to foreign table's pk # handle compund keys by recursion (fields are separated by commas) $fk =~ s/\s+//g; $field =~ s/\s+//g; my ($fk_rest, $field_rest); $fk_rest = $1 if $fk =~ s/,(.*)//; # trim and save xtra fields, if any $field_rest = $1 if $field =~ s/,(.*)//; # repeat for referenced fields if ($fk_rest || $field_rest) { $self->add_fk($fk_rest || $fk, $table, $field_rest, $tables); } # add fkey info to table my $tuple = [lc $table, lc $field]; $self->{fks}{lc $fk} = $tuple; return $tuple; } sub fks { my ($self, $fk) = @_; return $self->{fks}{lc $fk} if $fk; return $self->{fks}; } 1; } =head1 NAME B - converts SQL data-definition language into dot(1) directed graphs =head1 SYNOPSIS B ddl.sql [--guess-keys] | B B<-Tps> E out.ps =head1 DESCRIPTION B(1) reads through the supplied SQL data-definition language (DDL) and parses the C statements, building up an internal understanding of the tables and the relationships between them. Then the program generates graph output suitable for processing by dot(1), part of the Graphviz toolkit from AT&T Research. Get Graphviz from B =head1 EXPECTED DDL FORMAT B(1) expects tables to be declared similarly to this: C< CREATE TABLE tablename ( id integer not null, -- comment fld1 integer not null, fld2 char(2) not null, fld3 char(64), ... primary key (id), foreign key (fld1) references atable (afield), foreign key (fld2) references atable );> In particular, primary and foreign keys are expected to be declared separately, not in passing as the fields themselves are defined. When the referenced key is not provided in a foreign-key declaration, it is assumed to be the primary key of the referenced table. In the example, C relates to the primary key of C. Each field or key declaration should be on a line by itself. Order is not important, however. Field and key declarations can appear in any order in the table declarations. Case does not matter. The program does the right thing for compound keys. =head1 OPTIONS The B<--guess-keys> option will cause the program to guess which fields are foreign keys based on field names. For example, if one table has a primary key "customer_id" and another table has a field "customer_id", a foreign-key realtionship is deduced. The field's name must match the primary key's name exactly. =head1 LIMITATIONS B relies on pattern-matching and heuristics to parse the SQL DDL. While I have made reasonable attempts to make the parser robust, it will probably accept a lot of code that is not strictly SQL, and it may barf on oddball SQL that is technically legitimate. Line endings in the middle of statement clauses might cause problems. Always check the output for the desired degree of sanity. You have been warned. =head1 BUGS Some versions of dot(1) (1.7beta and prior) had a bug that would cause the edges directed between two fields in the same table to be reversed. If you have any tables that have foreign keys from themselves, be on the lookout for this bug. =head1 LICENSE This software is licensed under the GNU General Public License, as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The text of the GNU GPL may be found online: http://www.gnu.org/copyleft/gpl.html =head1 SEE ALSO dot(1) =head1 AUTHOR Tom Moertel Etom-perl@moertel.comE 31 Jan 2001 $Id: ddl-to-graph,v 1.3 2002/06/19 02:03:39 thor Exp $ =cut