Subversion Repositories develop

Rev

Rev 7 | Blame | Compare with Previous | Last modification | View Log | RSS feed

#!/bin/bash

# duplikaty.bash - szuka duplikatow plikow

# 2007-01-80
# Zalozenia:
# potrafi przegladac podany katalog (rekencyjnie w glab) 
# dodawac pliki do bazy w sqlite
# struktura rekordu:
# int ID
# char(32) md5
# char name
# char path
# datetime timestamp

# SQLite Tutorial: Common Commands and Triggers
# http://linuxgazette.net/109/chirico1.html

# Making SQLITE/SQLITE3 executable scripts.
# http://www.bigbold.com/snippets/posts/show/3080

sqlite=sqlite3
sql_db_name=duplikaty.db
sql_db_path="/home/jaqb/develop"
#sql_cmd="$sqlite ${sql_db}"
logfile="/tmp/duplikaty.log"
IFS=$'\n'


usage() {
                echo "Program szuka wszystkich plików w podanym katalogu i jego podkatalogach"
                echo "$0 --db_name duplikaty.db --db_path /tmp /sciezka/do/katalogu"
}

dodaj_katalog() {
# $1 katalog do szukania
# $2 katalog nadrzedny
kat=$1
kat_n=$2

echo "Katalog $kat ($kat_n)" >> $logfile
for x in ${kat}/* ; do
  if [ ! -L $x ] ; then
    if [ -f $x ] ; then
          plik=`basename $x`
          md5=`md5sum $x | cut -c 1-32`
#         echo "$plik = $md5"
          sciezka=`echo "${kat_n}/${x}" | sed 's/\/\.\//\//g'`
          sciezka=`dirname $sciezka`
          # let last_ID++
          # $sqlite ${sql_db} " INSERT INTO files VALUES ( $last_ID, '$md5', '$plik', '$sciezka', DATETIME('NOW')); "
          $sqlite ${sql_db} " INSERT INTO files (md5, name, path, timestamp) VALUES ( '$md5', '$plik', '$sciezka', DATETIME('NOW')); "
    fi
        if [ -d $x ] ; then
          dodaj_katalog $x $kat_n
        fi
  fi
done
}

if [ $# -lt 1 ] ; then
  echo "Za malo paramerow"
  exit
fi

while [ $# -gt 2 ] ; do
  echo "Liczba parametrow $#"
  case "$1" in
  --db_name)
        sql_db_name=$2
                shift 2
        ;;
  --db_path)
                # ${2%/} -- obcina slash na koncu (jesli jest)
        sql_db_path=${2%/}
                shift 2
        ;;
  --help)
                usage
                exit
        ;;
  *)
        echo "Zła opcja: $1"
        #gprintf "Zła opcja: $1\n"
        exit 1
  esac

done

if [ "$1" == "--help" ] ; then
  usage
  exit
fi

echo `date "+%F %T"` >> $logfile
sql_db="${sql_db_path}/${sql_db_name}"
echo "Baza = ${sql_db}" >> $logfile
echo "Katalog = ${1}" >> $logfile

if [ ! -f ${sql_db} ] ; then
echo ${sql_db} >> $logfile
$sqlite ${sql_db} <<WPIS_SQL_1
  create table files (
        ID INTEGER PRIMARY KEY,
        md5 TEXT,
        name TEXT,
        path TEXT,
        timestamp DATE
  );
create unique index ID on files(ID) ;
create index md5_idx on files(md5) ;
WPIS_SQL_1

fi

last_ID=`$sqlite ${sql_db} "select max(ID) from files; " | awk -F\| ' { print $1; } ' `

dodaj_katalog $1 `pwd`

$sqlite ${sql_db} "  create table dups ( md5 TEXT, nbr INTEGER  );"

# $sqlite ${sql_db} " select * from files order by md5; " 
$sqlite ${sql_db} "insert into dups select md5, count(*) as powt from files group by md5 having powt>1; "
query=" select md5, count(*) as powt from files group by md5 having powt>1; "
echo "$sqlite ${sql_db} $query:" >> $logfile
$sqlite ${sql_db} $query >> $logfile