1#!/usr/bin/perl -w
2
3# Based on create-mime.assign.pl in debian lighttpd (1.4.x) package
4# Creates an example mime.conf from /etc/mime.types
5
6use strict;
7
8# future: might use Getopt::Std, but this is simple enough for now
9my $verbose = 0;
10foreach (@ARGV) { $verbose = 1 if ($_ eq "-v"); }
11
12# text/* subtypes to serve as "text/...;charset=utf-8"
13# text/html IS NOT INCLUDED: html has its own method for defining charset
14#   (<meta>), but the standards specify that content-type in HTTP wins over
15#   the setting in the html document.
16# text/markdown doesn't have an official default charset, but requires
17#   one being specified - it seems reasonable to hardcode it to UTF-8
18my %text_utf8 = map { $_ => 1 } qw(
19	css
20	csv
21	markdown
22	plain
23	x-bibtex
24	x-boo
25	x-c++hdr
26	x-c++src
27	x-chdr
28	x-csh
29	x-csrc
30	x-dsrc
31	x-diff
32	x-haskell
33	x-java
34	x-lilypond
35	x-literate-haskell
36	x-makefile
37	x-moc
38	x-pascal
39	x-perl
40	x-python
41	x-scala
42	x-sh
43	x-tcl
44	x-tex
45);
46
47# map extension to hash which maps types to the type they should be replaced with
48my %manual_conflicts_resolve = (
49	'.ra' => {
50		'audio/x-pn-realaudio' => 'audio/x-realaudio',
51	},
52	# use font media types from iana registry
53	'.otf' => {
54		'application/font-sfnt' => 'font/ttf',
55		'font/sfnt' => 'font/ttf',
56		'font/ttf' => 'font/ttf',
57	},
58	'.ttf' => {
59		'application/font-sfnt' => 'font/ttf',
60		'font/otf' => 'font/ttf',
61		'font/sfnt' => 'font/ttf',
62	},
63	'.woff' => {
64		'application/font-woff' => 'font/woff',
65	},
66	# end of fonts
67	'.asn' => {
68		'chemical/x-ncbi-asn1-spec' => 'application/octet-stream',
69	},
70	'.ent' => {
71		'chemical/x-ncbi-asn1-ascii' => 'application/octet-stream',
72	},
73);
74
75open MIMETYPES, "/etc/mime.types" or die "Can't open /etc/mime.types: $!";
76
77my %extensions;
78my %lcext;
79sub set {
80	my ($extension, $mimetype) = @_;
81	$extensions{$extension} = $mimetype;
82	$lcext{lc($extension)} = $extension;
83}
84sub add {
85	my ($extension, $mimetype) = @_;
86	# lighttpd uses case-insensitive extension mapping to mime type.  Still,
87	# preserve case of first ext seen if case-insensitive duplicates exist.
88	my $seen = $lcext{lc($extension)};
89	if (defined($seen) && $seen ne $extension) {
90		# update @_ too for calls to set
91		$_[0] = $extension = $seen;
92	}
93	my $have = $extensions{$extension};
94
95	my $r = $manual_conflicts_resolve{$extension};
96	# update @_ too for calls to set
97	$_[1] = $mimetype = $r->{$mimetype} if $r && $r->{$mimetype};
98
99	# mime.types can have same extension for different mime types
100	if ($have) {
101		# application/octet-stream means we couldn't resolve another conflict
102		return if $have eq $mimetype || $have eq 'application/octet-stream';
103
104		my ($have_type, $have_subtype) = split /\//, $have, 2;
105		my ($type, $subtype) = split /\//, $mimetype, 2;
106
107		my $have_x = ($have_type =~ /^x-/ || $have_subtype =~ /^x-/);
108		my $x = ($type =~ /^x-/ || $subtype =~ /^x-/);
109
110		# entries without x- prefix in type/subtype win:
111		if ($have_x && !$x) {
112			return set @_; # overwrite
113		} elsif ($x && !$have_x) {
114			return; # ignore
115		}
116
117		# text/ wins over application/ for same subtype
118		if ($subtype eq $have_subtype) {
119			if ($type eq "text" && $have_type eq "application") {
120				return set @_; # overwrite
121			} elsif ($have_type eq "text" && $type eq "application") {
122				return; # ignore
123			}
124		}
125
126		# non-vnd.* subtype wins over vnd.* subtype
127		my $have_vnd = ($have_subtype =~ /^vnd\./);
128		my $vnd = ($subtype =~ /^vnd\./);
129		if ($vnd ^ $have_vnd) {
130			if ($have_vnd) {
131				return set @_; # overwrite
132			}
133			else {
134				return; # ignore
135			}
136		}
137
138		if ($verbose && !$vnd) {
139			print STDERR "Duplicate mimetype: '${extension}' => '${mimetype}' (already have '${have}'), merging to 'application/octet-stream'\n"
140		}
141		set ($extension, 'application/octet-stream');
142	} else {
143		set @_;
144	}
145}
146
147sub print_type {
148	my ($extension, $mimetype) = @_;
149	if ($mimetype =~ /^text\/(.*)$/) {
150		$mimetype .= ";charset=utf-8" if $text_utf8{$1};
151	}
152
153	print "\t\"${extension}\" => \"${mimetype}\",\n";
154}
155
156while (<MIMETYPES>) {
157	chomp;
158	s/\#.*//;
159	next if /^\w*$/;
160	if (/^([a-z0-9\/+.-]+)\s+((?:[a-z0-9+.-]+[ ]?)+)$/i) {
161		my $mimetype = $1;
162		my @extensions = split / /, $2;
163
164		foreach my $ext (@extensions) {
165			add(".${ext}", $mimetype);
166		}
167	}
168}
169
170# missing in /etc/mime.types;
171# from http://www.iana.org/assignments/media-types/media-types.xhtml
172add(".dtd", "application/xml-dtd");
173
174# RFC 9239
175add(".js", "text/javascript");
176add(".mjs", "text/javascript");
177
178# other useful mappings
179my %useful = (
180	".tar.gz"  => "application/x-gtar-compressed",
181	".gz"      => "application/x-gzip",
182	".tbz"     => "application/x-gtar-compressed",
183	".tar.bz2" => "application/x-gtar-compressed",
184	".bz2"     => "application/x-bzip",
185	".log"     => "text/plain",
186	".conf"    => "text/plain",
187	".spec"    => "text/plain",
188	"README"   => "text/plain",
189	"Makefile" => "text/x-makefile",
190);
191
192while (my ($ext, $mimetype) = each %useful) {
193	add($ext, $mimetype) unless $extensions{$ext};
194}
195
196
197print <<EOF;
198# created by create-mime.conf.pl
199
200#######################################################################
201##
202##  MimeType handling
203## -------------------
204##
205## https://redmine.lighttpd.net/projects/lighttpd/wiki/Mimetype_assignDetails
206
207##
208## mimetype.xattr-name
209## Set the extended file attribute name used to obtain mime type
210## (must also set mimetype.use-xattr = "enable")
211##
212## Default value is "Content-Type"
213##
214## freedesktop.org Shared MIME-info Database specification suggests
215## user-defined value ("user.mime_type") as name for extended file attribute
216#mimetype.xattr-name = "user.mime_type"
217
218##
219## Use extended attribute named in mimetype.xattr-name (default "Content-Type")
220## to obtain mime type if possible
221##
222## Disabled by default
223##
224#mimetype.use-xattr = "enable"
225
226##
227## mimetype ("Content-Type" HTTP header) mapping for static file handling
228##
229## The first matching suffix is used. If no mapping is found
230## 'application/octet-stream' is used, and caching (etag/last-modified handling)
231## is disabled to prevent clients from caching "unknown" mime types.
232##
233## Therefore the last mapping is:
234##   "" => "application/octet-stream"
235## This matches all extensions and acts as default mime type, and enables
236## caching for those.
237mimetype.assign = (
238EOF
239
240# sort "x-" and "vnd." prefixed names after everything else
241sub mimecmpvalue {
242	my ($mimetype) = @_;
243	$mimetype =~ s/(^|\/)(x-|vnd\.)/~$1$2/g;
244	return $mimetype;
245}
246sub countdots {
247	my ($s) = @_;
248	return scalar(() = $s =~ /\./g);
249}
250# the first matching suffix wins, so we have to sort by "length"
251# as all extensions start with "." we use the number of "."s as length
252# the exceptions are "README" and "Makefile" which are assumed not to conflict
253#  (i.e. are not a suffix of any other extension)
254for my $ext (sort { countdots($b) <=> countdots($a) || mimecmpvalue($extensions{$a}) cmp mimecmpvalue($extensions{$b}) || $a cmp $b } keys(%extensions)) {
255	print_type($ext, $extensions{$ext});
256}
257
258print <<EOF;
259
260	# enable caching for unknown mime types:
261	"" => "application/octet-stream"
262)
263EOF
264