<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="en">
	<id>https://halfgeek.org/wiki/index.php?action=history&amp;feed=atom&amp;title=Checking_whether_files_are_the_same_%28perl%29</id>
	<title>Checking whether files are the same (perl) - Revision history</title>
	<link rel="self" type="application/atom+xml" href="https://halfgeek.org/wiki/index.php?action=history&amp;feed=atom&amp;title=Checking_whether_files_are_the_same_%28perl%29"/>
	<link rel="alternate" type="text/html" href="https://halfgeek.org/wiki/index.php?title=Checking_whether_files_are_the_same_(perl)&amp;action=history"/>
	<updated>2026-05-28T12:25:54Z</updated>
	<subtitle>Revision history for this page on the wiki</subtitle>
	<generator>MediaWiki 1.34.0</generator>
	<entry>
		<id>https://halfgeek.org/wiki/index.php?title=Checking_whether_files_are_the_same_(perl)&amp;diff=285&amp;oldid=prev</id>
		<title>161.253.47.104 at 06:34, 6 November 2005</title>
		<link rel="alternate" type="text/html" href="https://halfgeek.org/wiki/index.php?title=Checking_whether_files_are_the_same_(perl)&amp;diff=285&amp;oldid=prev"/>
		<updated>2005-11-06T06:34:39Z</updated>

		<summary type="html">&lt;p&gt;&lt;/p&gt;
&lt;p&gt;&lt;b&gt;New page&lt;/b&gt;&lt;/p&gt;&lt;div&gt;A common way to determine whether two files are identical is, of course, to first compare their file sizes, then, if the same size, compare their message digests.&lt;br /&gt;
&lt;br /&gt;
The following program is one I wrote to traverse the result of &amp;lt;code&amp;gt;find .&amp;lt;/code&amp;gt; in an iTunes music directory seeking similarly-named duplicates.  (The duplicate of a given file &amp;#039;&amp;#039;base.ext&amp;#039;&amp;#039; would be &amp;#039;&amp;#039;base 1.ext&amp;#039;&amp;#039;, &amp;#039;&amp;#039;base 2.ext&amp;#039;&amp;#039;, &amp;#039;&amp;#039;et c.&amp;#039;&amp;#039;)  The program is implemented as a set of memory functions.&lt;br /&gt;
&lt;br /&gt;
 &amp;lt;nowiki&amp;gt;&lt;br /&gt;
#! /usr/bin/perl&lt;br /&gt;
&lt;br /&gt;
use warnings;&lt;br /&gt;
use strict;&lt;br /&gt;
&lt;br /&gt;
use Digest::SHA1;&lt;br /&gt;
my $digest = &amp;#039;Digest::SHA1&amp;#039;;&lt;br /&gt;
&lt;br /&gt;
my %tab = ();&lt;br /&gt;
&lt;br /&gt;
sub file_entry ($) {&lt;br /&gt;
	my $file = shift;&lt;br /&gt;
&lt;br /&gt;
	unless( exists $tab{$file} ) {&lt;br /&gt;
		if( -e $file ) {&lt;br /&gt;
			$tab{$file} = [];&lt;br /&gt;
		} else {&lt;br /&gt;
			$tab{$file} = [-1];&lt;br /&gt;
		}&lt;br /&gt;
	}&lt;br /&gt;
&lt;br /&gt;
	return $tab{$file};&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub file_size ($) {&lt;br /&gt;
	my $file = shift;&lt;br /&gt;
	my $e = file_entry($file);&lt;br /&gt;
&lt;br /&gt;
	unless( defined $e-&amp;gt;[0] ) {&lt;br /&gt;
		$e-&amp;gt;[0] = -s $file;&lt;br /&gt;
	}&lt;br /&gt;
&lt;br /&gt;
	return $e-&amp;gt;[0];&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub file_hash ($) {&lt;br /&gt;
&lt;br /&gt;
	my $file = shift;&lt;br /&gt;
	my $e = file_entry($file);&lt;br /&gt;
&lt;br /&gt;
	unless( defined $e-&amp;gt;[1] ) {&lt;br /&gt;
		return undef if file_size($file) &amp;lt; 0;&lt;br /&gt;
		my $ctx = $digest-&amp;gt;new;&lt;br /&gt;
		my $fp;&lt;br /&gt;
		open($fp, $file) or return undef;&lt;br /&gt;
		$ctx-&amp;gt;add(*$fp);&lt;br /&gt;
		$e-&amp;gt;[1] = $ctx-&amp;gt;b64digest;&lt;br /&gt;
		close $fp;&lt;br /&gt;
	}&lt;br /&gt;
&lt;br /&gt;
	return $e-&amp;gt;[1];&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
sub files_equiv ($$) {&lt;br /&gt;
	my($a,$b) = @_;&lt;br /&gt;
	return 0 if file_size($a) &amp;lt; 0;&lt;br /&gt;
	return 0 if file_size($b) &amp;lt; 0;&lt;br /&gt;
	return 0 if +( file_size($a) != file_size($b) );&lt;br /&gt;
	return 0 if +( file_hash($a) ne file_hash($b) );&lt;br /&gt;
	return 1;&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
my %remove = ();&lt;br /&gt;
&lt;br /&gt;
while( &amp;lt;STDIN&amp;gt; ) {&lt;br /&gt;
	chomp;&lt;br /&gt;
	next if $_ eq &amp;#039;.&amp;#039;;&lt;br /&gt;
	next if $_ eq &amp;#039;..&amp;#039;;&lt;br /&gt;
	next unless /^(.*?)\.([0-9a-zA-Z]+)$/;&lt;br /&gt;
	my $base = $1;&lt;br /&gt;
	my $ext = $2;&lt;br /&gt;
	my $file = $_;&lt;br /&gt;
	print STDERR &amp;quot;Processing: $file\n&amp;quot;;&lt;br /&gt;
	for my $i (1 .. 256) {&lt;br /&gt;
		my $alt = &amp;quot;$base $i.$ext&amp;quot;;&lt;br /&gt;
		if( files_equiv($file,$alt) ) {&lt;br /&gt;
			print STDERR &amp;quot;-- Duplicated in $alt\n&amp;quot;;&lt;br /&gt;
			$remove{$alt} = 1;&lt;br /&gt;
		}&lt;br /&gt;
	}&lt;br /&gt;
}&lt;br /&gt;
&lt;br /&gt;
print STDERR &amp;quot;Generating commands to remove files\n&amp;quot;;&lt;br /&gt;
&lt;br /&gt;
foreach( sort keys %remove ) {&lt;br /&gt;
	print qq(rm &amp;quot;$_&amp;quot;\n);&lt;br /&gt;
}&amp;lt;/nowiki&amp;gt;&lt;/div&gt;</summary>
		<author><name>161.253.47.104</name></author>
		
	</entry>
</feed>