xref: /libpciaccess/src/linux_sysfs.c (revision db56c640)
1 /*
2  * (C) Copyright IBM Corporation 2006
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
19  * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 /**
26  * \file linux_sysfs.c
27  * Access PCI subsystem using Linux's sysfs interface.  This interface is
28  * available starting somewhere in the late 2.5.x kernel phase, and is the
29  * prefered method on all 2.6.x kernels.
30  *
31  * \author Ian Romanick <[email protected]>
32  */
33 
34 #define _GNU_SOURCE
35 
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdio.h>
39 #include <unistd.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <fcntl.h>
43 #include <sys/mman.h>
44 #include <dirent.h>
45 #include <errno.h>
46 
47 #include "pciaccess.h"
48 #include "pciaccess_private.h"
49 
50 static int pci_device_linux_sysfs_read_rom( struct pci_device * dev,
51     void * buffer );
52 
53 static int pci_device_linux_sysfs_probe( struct pci_device * dev );
54 
55 static int pci_device_linux_sysfs_map_region( struct pci_device * dev,
56     unsigned region, int write_enable );
57 
58 static int pci_device_linux_sysfs_unmap_region( struct pci_device * dev,
59     unsigned region );
60 
61 static int pci_device_linux_sysfs_read( struct pci_device * dev, void * data,
62     pciaddr_t offset, pciaddr_t size, pciaddr_t * bytes_read );
63 
64 static int pci_device_linux_sysfs_write( struct pci_device * dev,
65     const void * data, pciaddr_t offset, pciaddr_t size,
66     pciaddr_t * bytes_wrtten );
67 
68 static const struct pci_system_methods linux_sysfs_methods = {
69     .destroy = NULL,
70     .destroy_device = NULL,
71     .read_rom = pci_device_linux_sysfs_read_rom,
72     .probe = pci_device_linux_sysfs_probe,
73     .map = pci_device_linux_sysfs_map_region,
74     .unmap = pci_device_linux_sysfs_unmap_region,
75 
76     .read = pci_device_linux_sysfs_read,
77     .write = pci_device_linux_sysfs_write,
78 
79     .fill_capabilities = pci_fill_capabilities_generic
80 };
81 
82 #define SYS_BUS_PCI "/sys/bus/pci/devices"
83 
84 
85 static int populate_entries(struct pci_system * pci_sys);
86 
87 
88 /**
89  * Attempt to access PCI subsystem using Linux's sysfs interface.
90  */
91 int
92 pci_system_linux_sysfs_create( void )
93 {
94     int err = 0;
95     struct stat st;
96 
97 
98     /* If the directory "/sys/bus/pci/devices" exists, then the PCI subsystem
99      * can be accessed using this interface.
100      */
101 
102     if ( stat( SYS_BUS_PCI, & st ) == 0 ) {
103 	pci_sys = calloc( 1, sizeof( struct pci_system ) );
104 	if ( pci_sys != NULL ) {
105 	    pci_sys->methods = & linux_sysfs_methods;
106 	    err = populate_entries(pci_sys);
107 	}
108 	else {
109 	    err = ENOMEM;
110 	}
111     }
112     else {
113 	err = errno;
114     }
115 
116     return err;
117 }
118 
119 
120 /**
121  * Filter out the names "." and ".." from the scanned sysfs entries.
122  *
123  * \param d  Directory entry being processed by \c scandir.
124  *
125  * \return
126  * Zero if the entry name matches either "." or "..", non-zero otherwise.
127  *
128  * \sa scandir, populate_entries
129  */
130 static int
131 scan_sys_pci_filter( const struct dirent * d )
132 {
133     return !((strcmp( d->d_name, "." ) == 0)
134 	     || (strcmp( d->d_name, ".." ) == 0));
135 }
136 
137 
138 int
139 populate_entries( struct pci_system * p )
140 {
141     struct dirent ** devices;
142     int n;
143     int i;
144     int err;
145 
146 
147     n = scandir( SYS_BUS_PCI, & devices, scan_sys_pci_filter, alphasort );
148     if ( n > 0 ) {
149 	p->num_devices = n;
150 	p->devices = calloc( n, sizeof( struct pci_device_private ) );
151 
152 	if (p->devices != NULL) {
153 	    for (i = 0 ; i < n ; i++) {
154 		uint8_t config[48];
155 		pciaddr_t bytes;
156 		unsigned dom, bus, dev, func;
157 		struct pci_device_private *device =
158 			(struct pci_device_private *) &p->devices[i];
159 
160 
161 		sscanf(devices[i]->d_name, "%04x:%02x:%02x.%1u",
162 		       & dom, & bus, & dev, & func);
163 
164 		device->base.domain = dom;
165 		device->base.bus = bus;
166 		device->base.dev = dev;
167 		device->base.func = func;
168 
169 
170 		err = pci_device_linux_sysfs_read(& device->base, config, 0,
171 						  48, & bytes);
172 		if ((bytes == 48) && !err) {
173 		    device->base.vendor_id = (uint16_t)config[0]
174 			+ ((uint16_t)config[1] << 8);
175 		    device->base.device_id = (uint16_t)config[2]
176 			+ ((uint16_t)config[3] << 8);
177 		    device->base.device_class = (uint32_t)config[9]
178 			+ ((uint32_t)config[10] << 8)
179 			+ ((uint32_t)config[11] << 16);
180 		    device->base.revision = config[8];
181 		    device->base.subvendor_id = (uint16_t)config[44]
182 			+ ((uint16_t)config[45] << 8);
183 		    device->base.subdevice_id = (uint16_t)config[46]
184 			+ ((uint16_t)config[47] << 8);
185 		}
186 
187 		if (err) {
188 		    break;
189 		}
190 	    }
191 	}
192 	else {
193 	    err = ENOMEM;
194 	}
195     }
196 
197     if (err) {
198 	free(p->devices);
199 	p->devices = NULL;
200     }
201 
202     return err;
203 }
204 
205 
206 static int
207 pci_device_linux_sysfs_probe( struct pci_device * dev )
208 {
209     char     name[256];
210     uint8_t  config[256];
211     char     resource[512];
212     int fd;
213     pciaddr_t bytes;
214     unsigned i;
215     int err;
216 
217 
218     err = pci_device_linux_sysfs_read( dev, config, 0, 256, & bytes );
219     if ( bytes >= 64 ) {
220 	struct pci_device_private *priv = (struct pci_device_private *) dev;
221 
222 	dev->irq = config[60];
223 	priv->header_type = config[14];
224 
225 
226 	/* The PCI config registers can be used to obtain information
227 	 * about the memory and I/O regions for the device.  However,
228 	 * doing so requires some tricky parsing (to correctly handle
229 	 * 64-bit memory regions) and requires writing to the config
230 	 * registers.  Since we'd like to avoid having to deal with the
231 	 * parsing issues and non-root users can write to PCI config
232 	 * registers, we use a different file in the device's sysfs
233 	 * directory called "resource".
234 	 *
235 	 * The resource file contains all of the needed information in
236 	 * a format that is consistent across all platforms.  Each BAR
237 	 * and the expansion ROM have a single line of data containing
238 	 * 3, 64-bit hex values:  the first address in the region,
239 	 * the last address in the region, and the region's flags.
240 	 */
241 	snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/resource",
242 		  SYS_BUS_PCI,
243 		  dev->domain,
244 		  dev->bus,
245 		  dev->dev,
246 		  dev->func );
247 	fd = open( name, O_RDONLY );
248 	if ( fd != -1 ) {
249 	    char * next;
250 	    pciaddr_t  low_addr;
251 	    pciaddr_t  high_addr;
252 	    pciaddr_t  flags;
253 
254 
255 	    bytes = read( fd, resource, 512 );
256 	    resource[511] = '\0';
257 
258 	    close( fd );
259 
260 	    next = resource;
261 	    for ( i = 0 ; i < 6 ; i++ ) {
262 
263 		dev->regions[i].base_addr = strtoull( next, & next, 16 );
264 		high_addr = strtoull( next, & next, 16 );
265 		flags = strtoull( next, & next, 16 );
266 
267 		if ( dev->regions[i].base_addr != 0 ) {
268 		    dev->regions[i].size = (high_addr
269 					    - dev->regions[i].base_addr) + 1;
270 
271 		    dev->regions[i].is_IO = (flags & 0x01);
272 		    dev->regions[i].is_64 = (flags & 0x04);
273 		    dev->regions[i].is_prefetchable = (flags & 0x08);
274 		}
275 	    }
276 
277 	    low_addr = strtoull( next, & next, 16 );
278 	    high_addr = strtoull( next, & next, 16 );
279 	    flags = strtoull( next, & next, 16 );
280 	    if ( low_addr != 0 ) {
281 		dev->rom_size = (high_addr - low_addr) + 1;
282 	    }
283 	}
284     }
285 
286     return err;
287 }
288 
289 
290 static int
291 pci_device_linux_sysfs_read_rom( struct pci_device * dev, void * buffer )
292 {
293     char name[256];
294     int fd;
295     struct stat  st;
296     int err = 0;
297     size_t total_bytes;
298 
299 
300     snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/rom",
301 	      SYS_BUS_PCI,
302 	      dev->domain,
303 	      dev->bus,
304 	      dev->dev,
305 	      dev->func );
306 
307     fd = open( name, O_RDWR );
308     if ( fd == -1 ) {
309 	return errno;
310     }
311 
312 
313     if ( fstat( fd, & st ) == -1 ) {
314 	close( fd );
315 	return errno;
316     }
317 
318 
319     /* This is a quirky thing on Linux.  Even though the ROM and the file
320      * for the ROM in sysfs are read-only, the string "1" must be written to
321      * the file to enable the ROM.  After the data has been read, "0" must be
322      * written to the file to disable the ROM.
323      */
324     write( fd, "1", 1 );
325     lseek( fd, 0, SEEK_SET );
326 
327     for ( total_bytes = 0 ; total_bytes < st.st_size ; /* empty */ ) {
328 	const int bytes = read( fd, (char *) buffer + total_bytes,
329 				st.st_size - total_bytes );
330 	if ( bytes == -1 ) {
331 	    err = errno;
332 	    break;
333 	}
334 	else if ( bytes == 0 ) {
335 	    break;
336 	}
337 
338 	total_bytes += bytes;
339     }
340 
341 
342     lseek( fd, 0, SEEK_SET );
343     write( fd, "0", 1 );
344 
345     close( fd );
346     return err;
347 }
348 
349 
350 static int
351 pci_device_linux_sysfs_read( struct pci_device * dev, void * data,
352 			     pciaddr_t offset, pciaddr_t size,
353 			     pciaddr_t * bytes_read )
354 {
355     char name[256];
356     pciaddr_t temp_size = size;
357     int err = 0;
358     int fd;
359 
360 
361     if ( bytes_read != NULL ) {
362 	*bytes_read = 0;
363     }
364 
365     /* Each device has a directory under sysfs.  Within that directory there
366      * is a file named "config".  This file used to access the PCI config
367      * space.  It is used here to obtain most of the information about the
368      * device.
369      */
370     snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/config",
371 	      SYS_BUS_PCI,
372 	      dev->domain,
373 	      dev->bus,
374 	      dev->dev,
375 	      dev->func );
376 
377     fd = open( name, O_RDONLY );
378     if ( fd == -1 ) {
379 	return errno;
380     }
381 
382 
383     while ( temp_size > 0 ) {
384 	const ssize_t bytes = pread64( fd, data, temp_size, offset );
385 
386 	/* If zero bytes were read, then we assume it's the end of the
387 	 * config file.
388 	 */
389 	if ( bytes <= 0 ) {
390 	    err = errno;
391 	    break;
392 	}
393 
394 	temp_size -= bytes;
395 	offset += bytes;
396 	data += bytes;
397     }
398 
399     if ( bytes_read != NULL ) {
400 	*bytes_read = size - temp_size;
401     }
402 
403     close( fd );
404     return err;
405 }
406 
407 
408 static int
409 pci_device_linux_sysfs_write( struct pci_device * dev, const void * data,
410 			     pciaddr_t offset, pciaddr_t size,
411 			     pciaddr_t * bytes_written )
412 {
413     char name[256];
414     pciaddr_t temp_size = size;
415     int err = 0;
416     int fd;
417 
418 
419     if ( bytes_written != NULL ) {
420 	*bytes_written = 0;
421     }
422 
423     /* Each device has a directory under sysfs.  Within that directory there
424      * is a file named "config".  This file used to access the PCI config
425      * space.  It is used here to obtain most of the information about the
426      * device.
427      */
428     snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/config",
429 	      SYS_BUS_PCI,
430 	      dev->domain,
431 	      dev->bus,
432 	      dev->dev,
433 	      dev->func );
434 
435     fd = open( name, O_WRONLY );
436     if ( fd == -1 ) {
437 	return errno;
438     }
439 
440 
441     while ( temp_size > 0 ) {
442 	const ssize_t bytes = pwrite64( fd, data, temp_size, offset );
443 
444 	/* If zero bytes were written, then we assume it's the end of the
445 	 * config file.
446 	 */
447 	if ( bytes <= 0 ) {
448 	    err = errno;
449 	    break;
450 	}
451 
452 	temp_size -= bytes;
453 	offset += bytes;
454 	data += bytes;
455     }
456 
457     if ( bytes_written != NULL ) {
458 	*bytes_written = size - temp_size;
459     }
460 
461     close( fd );
462     return err;
463 }
464 
465 
466 /**
467  * Map a memory region for a device using the Linux sysfs interface.
468  *
469  * \param dev          Device whose memory region is to be mapped.
470  * \param region       Region, on the range [0, 5], that is to be mapped.
471  * \param write_enable Map for writing (non-zero).
472  *
473  * \return
474  * Zero on success or an \c errno value on failure.
475  *
476  * \sa pci_device_map_region, pci_device_linux_sysfs_unmap_region
477  *
478  * \todo
479  * Some older 2.6.x kernels don't implement the resourceN files.  On those
480  * systems /dev/mem must be used.  On these systems it is also possible that
481  * \c mmap64 may need to be used.
482  */
483 static int
484 pci_device_linux_sysfs_map_region( struct pci_device * dev, unsigned region,
485 				   int write_enable )
486 {
487     char name[256];
488     int fd;
489     int err = 0;
490     const int prot = (write_enable) ? (PROT_READ | PROT_WRITE) : PROT_READ;
491 
492 
493     snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/resource%u",
494 	      SYS_BUS_PCI,
495 	      dev->domain,
496 	      dev->bus,
497 	      dev->dev,
498 	      dev->func,
499 	      region );
500 
501     fd = open( name, (write_enable) ? O_RDWR : O_RDONLY );
502     if ( fd == -1 ) {
503 	return errno;
504     }
505 
506 
507     dev->regions[ region ].memory = mmap( NULL, dev->regions[ region ].size,
508 					  prot, MAP_SHARED, fd, 0 );
509     if ( dev->regions[ region ].memory == MAP_FAILED ) {
510 	err = errno;
511 	dev->regions[ region ].memory = NULL;
512     }
513 
514     close( fd );
515     return err;
516 }
517 
518 
519 /**
520  * Unmap the specified region using the Linux sysfs interface.
521  *
522  * \param dev          Device whose memory region is to be mapped.
523  * \param region       Region, on the range [0, 5], that is to be mapped.
524  *
525  * \return
526  * Zero on success or an \c errno value on failure.
527  *
528  * \sa pci_device_unmap_region, pci_device_linux_sysfs_map_region
529  *
530  * \todo
531  * Some older 2.6.x kernels don't implement the resourceN files.  On those
532  * systems /dev/mem must be used.  On these systems it is also possible that
533  * \c mmap64 may need to be used.
534  */
535 static int
536 pci_device_linux_sysfs_unmap_region( struct pci_device * dev, unsigned region )
537 {
538     int err = 0;
539 
540     if ( munmap( dev->regions[ region ].memory, dev->regions[ region ].size )
541 	 == -1 ) {
542 	err = errno;
543     }
544 
545     dev->regions[ region ].memory = NULL;
546 
547     return err;
548 }
549