[Source] Duplicate File Detector 12-14-2013, 02:06 PM
#1
The following is a code which can detect duplicate files in your machine. I'm myself the author of this code. It still has a slight issue which I'm working on, but it works fine most of the time.
Screenshot :
![[Image: image.png]](http://s16.postimg.org/cfbovt391/image.png)
Technique Used :
It compared MD5 Hash values of various files already scanned to see it the one being scanned is a duplicate of one which has already been scanned in past. If yes, it is added to the list of duplicate files, else its MD5 is added to this HashMap containing all unique files' hashes in the system.
Source Code :
DuplicateFileDetector.java
TraverseAndTestFiles.java
Data.java
Regards
Screenshot :
![[Image: image.png]](http://s16.postimg.org/cfbovt391/image.png)
Technique Used :
It compared MD5 Hash values of various files already scanned to see it the one being scanned is a duplicate of one which has already been scanned in past. If yes, it is added to the list of duplicate files, else its MD5 is added to this HashMap containing all unique files' hashes in the system.
Source Code :
DuplicateFileDetector.java
Code:
import java.io.*;
import javax.swing.*;
import javax.swing.UIManager.LookAndFeelInfo;
import javax.swing.table.TableColumn;
import java.awt.event.*;
import java.awt.*;
public class DuplicateFileDetector extends JFrame implements ActionListener{
private static final long serialVersionUID = 425524048438044374L;
private JLabel pathLabel,resultLabel,totalScannedLabel,duplicatesFoundLabel,currentLabel,delLabel;
private JTextField pathField,delField;
private JButton browseButton,startButton,pauseButton,stopButton,delButton,clearButton;
private JTable resultTable;
private JScrollPane tableScroll;
private JProgressBar progressbar;
private String columns[]={"Sl. No.","First Encounter","Current Encounter"};
private String data[][]=new String[1000][3];
private TraverseAndTestFiles tatf;
public DuplicateFileDetector() {
super("Duplicate File Detector");
setDefaultCloseOperation(DISPOSE_ON_CLOSE);
setLayout(null);
setSize(550,600);
setResizable(false);
pathLabel=new JLabel("Scan Location");
pathLabel.setBounds(20, 40, 100, 30);
resultLabel=new JLabel("Scan Result");
resultLabel.setBounds(20, 120, 100, 30);
totalScannedLabel=new JLabel("Total Files Scanned : ");
totalScannedLabel.setBounds(20,470,510,30);
duplicatesFoundLabel=new JLabel("Duplicates Found : ");
duplicatesFoundLabel.setBounds(20,490,510,30);
currentLabel=new JLabel("");
currentLabel.setBounds(10,530,530,30);
delLabel=new JLabel("Delete File");
delLabel.setBounds(20, 430, 100, 30);
pathField=new JTextField();
pathField.setBounds(120, 40, 200, 30);
delField=new JTextField();
delField.setBounds(120, 430, 200, 30);
browseButton=new JButton("Browse");
browseButton.setBounds(350,40,100,30);
browseButton.addActionListener(this);
delButton=new JButton("Delete Now");
delButton.setBounds(330, 430, 100, 30);
delButton.addActionListener(this);
clearButton=new JButton("Clear");
clearButton.setBounds(430, 430, 70, 30);
clearButton.addActionListener(this);
startButton=new JButton("Start");
startButton.setBounds(120, 80, 100, 30);
startButton.addActionListener(this);
pauseButton=new JButton("Pause");
pauseButton.setBounds(230, 80, 100, 30);
pauseButton.addActionListener(this);
pauseButton.setEnabled(false);
stopButton=new JButton("Stop");
stopButton.setBounds(340,80,100,30);
stopButton.addActionListener(this);
stopButton.setEnabled(false);
resultTable=new JTable(data,columns);
resultTable.setShowGrid(false);
resultTable.setCellSelectionEnabled(false);
TableColumn ct=resultTable.getColumnModel().getColumn(0);
ct.setPreferredWidth(1);
ct=resultTable.getColumnModel().getColumn(1);
ct.setPreferredWidth(150);
ct=resultTable.getColumnModel().getColumn(2);
ct.setPreferredWidth(150);
tableScroll=new JScrollPane(resultTable);
tableScroll.setBounds(20,150,480,200);
progressbar=new JProgressBar();
progressbar.setBounds(20,370,480,30);
progressbar.setStringPainted(true);
progressbar.setString("Inactive");
add(pathLabel);
add(pathField);
add(browseButton);
add(resultLabel);
add(startButton);
add(pauseButton);
add(stopButton);
add(tableScroll);
add(progressbar);
add(totalScannedLabel);
add(duplicatesFoundLabel);
add(currentLabel);
add(delLabel);
add(delButton);
add(delField);
add(clearButton);
setTheme();
setVisible(true);
}
@Override
public void actionPerformed(ActionEvent e) {
Object event=e.getSource();
if(event==browseButton) {
JFileChooser jfc=new JFileChooser();
jfc.setFileSelectionMode(JFileChooser.DIRECTORIES_ONLY);
int ret=jfc.showOpenDialog(this);
if(ret==JFileChooser.APPROVE_OPTION) {
File f=jfc.getSelectedFile();
String s=f.getPath();
pathField.setText(s);
}
}
else if(event==startButton) {
String s=pathField.getText();
File f=new File(s);
if(f.exists()) {
tatf=new TraverseAndTestFiles(s,resultTable,this);
tatf.start();
pauseButton.setEnabled(true);
stopButton.setEnabled(true);
browseButton.setEnabled(false);
pathField.setEnabled(false);
startButton.setEnabled(false);
}
}
else if(event==pauseButton) {
if(!tatf.isPaused()) {
tatf.setPause(true);
pauseButton.setText("Resume");
stopButton.setEnabled(false);
}
else {
tatf.setPause(false);
pauseButton.setText("Pause");
stopButton.setEnabled(true);
}
}
else if(event==stopButton) {
tatf.setStopped();
startButton.setEnabled(true);
stopButton.setEnabled(false);
pauseButton.setEnabled(false);
pathField.setEnabled(true);
browseButton.setEnabled(true);
}
else if(event==delButton) {
File f=new File(delField.getText());
if(f.exists()) {
if(f.delete()) {
delField.setText("File Deleted!");
}
else {
delField.setText("File could not be deleted!");
}
}
else {
delField.setText("The file does not exist!");
}
}
else if(event==clearButton) {
delField.setText("");
}
}
public void clear() {
setProgress(0);
for(int i=0;i<1000;i++) {
resultTable.setValueAt("", i, 0);
resultTable.setValueAt("", i, 1);
resultTable.setValueAt("", i, 2);
}
}
public void reset() {
pathField.setEnabled(true);
startButton.setEnabled(true);
pauseButton.setEnabled(false);
stopButton.setEnabled(false);
browseButton.setEnabled(true);
}
public void setProgress(int prog) {
progressbar.setValue(prog);
if(prog<100) {
progressbar.setString(prog + "% completed");
}
else {
progressbar.setString("Scanning Completed");
}
}
public void setFilesScanned(int value) {
totalScannedLabel.setText("Total Files Scanned : "+value);
}
public void setDuplicatesFound(int value) {
duplicatesFoundLabel.setText("Duplicates Found : "+value);
}
public void currentlyScanning(String file) {
currentLabel.setText(file);
}
public void setTheme() {
try {
for (LookAndFeelInfo info : UIManager.getInstalledLookAndFeels()) {
if ("Nimbus".equals(info.getName())) {
UIManager.setLookAndFeel(info.getClassName());
break;
}
}
}
catch (Exception e) {
}
UIManager.put("nimbusBase", new Color(255,150,100));
UIManager.put("nimbusBlueGrey", new Color(255,200,150));
UIManager.put("control", new Color(255,255,225));
SwingUtilities.updateComponentTreeUI(this);
}
public static void main(String args[]) {
SwingUtilities.invokeLater(new Runnable() {
@Override
public void run() {
new DuplicateFileDetector();
}
});
}
}
TraverseAndTestFiles.java
Code:
import java.io.*;
import java.util.*;
import javax.swing.*;
public class TraverseAndTestFiles extends Thread{
private Data d;
private DuplicateFileDetector dfd;
private File f,currentFile;
private ArrayList<File> queue;
private boolean paused,stop,resetAtResume;
private long totalSize,progressedSize;
private int filesScanned;
public TraverseAndTestFiles(String path,JTable data,DuplicateFileDetector dfd) {
f=new File(path);
currentFile=f;
d=new Data(data,this);
this.dfd=dfd;
totalSize=new DirectorySize().getDirectorySize(f);
progressedSize=0;
queue=new ArrayList<File>();
paused=false;
stop=false;
resetAtResume=false;
}
@Override
public void run() {
dfd.setDuplicatesFound(0);
dfd.clear();
filesScanned=0;
ProgressUpdate pu=new ProgressUpdate();
pu.start();
File list[]=f.listFiles();
queue.add(f);
if(list==null)
return;
while(!queue.isEmpty()) {
f=queue.get(0);
queue.remove(0);
list=f.listFiles();
if(list==null)
continue;
for(int i=0;i<list.length;i++) {
while(paused) {
try {
Thread.sleep(100);
}
catch(InterruptedException e) {
e.printStackTrace();
}
if(resetAtResume) {
dfd.reset();
resetAtResume=false;
}
}
if(stop) {
dfd.setProgress(100);
return;
}
if(list[i].isFile()) {
currentFile=list[i];
d.addValue(currentFile);
filesScanned++;
progressedSize+=currentFile.length();
}
else if(list[i].isDirectory()) {
queue.add(list[i]);
}
}
}
dfd.setProgress(100);
dfd.currentlyScanning("Done!");
dfd.reset();
d.reset();
pu.end();
}
public void resetAtResume() {
resetAtResume=true;
}
public void setPause(boolean paused) {
this.paused=paused;
}
public boolean isPaused() {
return paused;
}
public void setStopped() {
stop=true;
}
public void updateDuplicate(int value) {
dfd.setDuplicatesFound(value);
}
class ProgressUpdate extends Thread {
boolean end=false;
@Override
public void run() {
while(!stop && !end) {
int value=(int)(100*(((float)progressedSize/totalSize)));
dfd.setProgress(value);
dfd.setFilesScanned(filesScanned);
dfd.currentlyScanning(currentFile.getPath());
try {
Thread.sleep(100);
}
catch(InterruptedException e) {
e.printStackTrace();
}
}
}
public void end() {
end=true;
}
}
class DirectorySize {
public long getDirectorySize(File f) {
long totalSize=0;
ArrayList<File> queue=new ArrayList<File>();
File list[]=f.listFiles();
queue.add(f);
if(list==null)
return 0;
while(!queue.isEmpty()) {
f=queue.get(0);
queue.remove(0);
list=f.listFiles();
if(list==null)
continue;
for(int i=0;i<list.length;i++) {
if(list[i].isFile()) {
totalSize+=list[i].length();
}
else if(list[i].isDirectory()) {
queue.add(list[i]);
}
}
}
return totalSize;
}
}
}
Data.java
Code:
import java.security.*;
import java.util.*;
import java.io.*;
import java.math.*;
import javax.swing.JTable;
public class Data {
private HashMap<String,String> hm;
private JTable data;
private int duplicates,count;
private TraverseAndTestFiles tatf;
public Data(JTable data,TraverseAndTestFiles tatf) {
this.data=data;
this.tatf=tatf;
duplicates=0;
count=0;
hm=new HashMap<String,String>();
}
public String checkDuplicate(String str) {
String duplicate="Squirrel";
if(hm.containsKey(str))
duplicate=hm.get(str);
return duplicate;
}
public void addValue(File file) {
try {
String path=file.getPath();
FileReader fr=new FileReader(file);
BufferedReader br=new BufferedReader(fr);
MessageDigest md=MessageDigest.getInstance("MD5");
StringBuffer sb=new StringBuffer();
String s="";
if(file.length()<25000000) {
while((s=br.readLine())!=null) {
sb.append(s);
sb.append("\n");
}
}
else {
int l=0;
while((s=br.readLine())!=null && ++l<10000) {
sb.append(s);
sb.append("\n");
}
}
br.close();
s=new String(sb);
byte[] r=md.digest(s.getBytes());
BigInteger val=new BigInteger(1,r);
String h=val.toString(16);
String dup=checkDuplicate(h);
if(!dup.equals("Squirrel")) {
data.setValueAt(String.valueOf(count+1),count,0);
data.setValueAt(path,count,2);
data.setValueAt(dup,count,1);
count++;
duplicates++;
tatf.updateDuplicate(duplicates);
if(count==1000) {
tatf.setPause(true);
tatf.resetAtResume();
count=0;
}
}
else {
hm.put(h, file.getPath());
}
}
catch(IOException e) {
e.printStackTrace();
}
catch(NoSuchAlgorithmException e) {
e.printStackTrace();
}
}
public void reset() {
duplicates=0;
}
}
Regards
Folow me on My YouTube Channel if you're into art.