画像数とサイズを調べるスクリプト

あるディレクトリ(フォルダ)から再帰的にディレクトリ(フォルダ)を調べてディレクトリ(フォルダ)煮保存されている画像ファイルの数とデータサイズを表示(+CSVでエクスポート)するPythonスクリプトです。


ファイルサーバの容量を必要もない画像で圧迫されてどうしたものか、と会社の課題に直面し突貫ですが書いてみました。

どれくらい画像がリソースを使っているのか現実を見るとびっくりしてしまいますが、実際にこのうち何パーセントの画像が業務で必要とされているのか・・・



GRAPHIC_EXTSリストの中を書き換えればExcelファイルだけを探したりもできます。

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import os.path
import os

GRAPHIC_EXTS = ['.jpg', '.JPG',
             '.gif', 'GIF',
             '.bmp', 'BMP',
             '.png', '.PNG',
             '.ai', '.AI',
             '.eps', '.EPS',
             '.tif', '.TIF',
             '.psd', '.PSD',
             '.ps', '.PS',
             '.tiff', '.TIFF'
             ]

def get_root_path():
  check_root = True
  while check_root:
    audit_root = raw_input("Enter the root path > ")
    audit_root = audit_root.replace("\\", "/")
    if os.path.exists(audit_root):
      check_root = False
    else:
      print "!!! " + audit_root + " does not exist! Try Again !!!"
  return audit_root

def search_directory(root_path = "c:/"):
  graphic_dirs = {}
  graphic_size = {}
  for root, dirs, files in os.walk(root_path):
    counter = 0
    for file in files:
      file_path = os.path.join(root, file)
      file_name, file_ext = os.path.splitext(file_path)
      if file_ext in GRAPHIC_EXTS:
        graphic_dirs[root] = 0
        graphic_size[root] = 0
        counter += 1
        graphic_dirs[root] = counter
        fsize = os.path.getsize(file_path)
        graphic_size[root] += fsize
  return graphic_dirs, graphic_size


def byte2mega(byte):
  mega = byte / 1024.0 / 1024.0
  return round(mega, 2)

def calc_total(graphic_dirs, graphic_size):
  total_files = 0
  total_size = 0
  for key in graphic_dirs:
    total_files += graphic_dirs[key]
  for key in graphic_size:
    total_size += graphic_size[key]
  return total_files, total_size


def write_to_csv(graphic_dirs, graphic_size):
  csv_directory = ''
  csv_file_name = "result_audit.csv"
  print "Where do you want to save the csv?"
  while not os.path.exists(csv_directory):
    csv_directory = raw_input("Enter> ")
    csv_directory = csv_directory.replace("\\", "/")
  os.chdir(csv_directory)
  csvfile = open(csv_file_name, 'w')
  first_line = "numbers of files,size of directory,directory"
  csvfile.write(first_line + "\n")
  for key in sorted(graphic_dirs.keys()):
    record = str(graphic_dirs[key]) + "," + str(byte2mega(graphic_size[key])) + \
      "MB," + key + "\n"
    csvfile.write(record)
  csvfile.close()

  print "CSV file has been saved in " + csv_directory + "/" + csv_file_name


def main():
  root_path = get_root_path()
  graphic_dirs, graphic_size = search_directory(root_path)
  for key in sorted(graphic_dirs.keys()):
    print key + " -> " + str(graphic_dirs[key]) + " -> " + \
      str(byte2mega(graphic_size[key])) + "MB"
  csv_no_csv = ""
  total_files, total_size = calc_total(graphic_dirs, graphic_size)
  print "*" * 79
  print "Numbers of Files Checked : " + str(total_files)
  print "File Sizes Checked : " + str(byte2mega(total_size)) + "MB"
  print "*" * 79
  print "Do you want to export this list to csv?"
  while not csv_no_csv in ["yes", "no"]:
    csv_no_csv = raw_input("(yes | no) > ")
  if csv_no_csv == "yes":
    write_to_csv(graphic_dirs, graphic_size)
  go_no_go = ""
  print "Do you want to continue?"
  while not go_no_go in ["yes", "no"]:
    go_no_go = raw_input("(yes | no) > ")
  if go_no_go == "yes":
    main()
  else:
    print "Good Bye!"

if __name__ == '__main__':
  main()