xref: /libpciaccess/src/linux_sysfs.c (revision 9b77a21e)
1 /*
2  * (C) Copyright IBM Corporation 2006
3  * All Rights Reserved.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * on the rights to use, copy, modify, merge, publish, distribute, sub
9  * license, and/or sell copies of the Software, and to permit persons to whom
10  * the Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice (including the next
13  * paragraph) shall be included in all copies or substantial portions of the
14  * Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
19  * IBM AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
22  * DEALINGS IN THE SOFTWARE.
23  */
24 
25 /**
26  * \file linux_sysfs.c
27  * Access PCI subsystem using Linux's sysfs interface.  This interface is
28  * available starting somewhere in the late 2.5.x kernel phase, and is the
29  * preferred method on all 2.6.x kernels.
30  *
31  * \author Ian Romanick <[email protected]>
32  */
33 
34 #define _GNU_SOURCE
35 
36 #include <stdlib.h>
37 #include <string.h>
38 #include <stdio.h>
39 #include <unistd.h>
40 #include <sys/types.h>
41 #include <sys/stat.h>
42 #include <fcntl.h>
43 #include <sys/mman.h>
44 #include <dirent.h>
45 #include <errno.h>
46 
47 #ifndef ANDROID
48 #include "config.h"
49 #endif
50 
51 #ifdef HAVE_MTRR
52 #include <asm/mtrr.h>
53 #include <sys/ioctl.h>
54 #endif
55 
56 #include "pciaccess.h"
57 #include "pciaccess_private.h"
58 #include "linux_devmem.h"
59 
60 static const struct pci_system_methods linux_sysfs_methods;
61 
62 #define SYS_BUS_PCI "/sys/bus/pci/devices"
63 
64 static int
65 pci_device_linux_sysfs_read( struct pci_device * dev, void * data,
66 			     pciaddr_t offset, pciaddr_t size,
67 			     pciaddr_t * bytes_read );
68 
69 static int populate_entries(struct pci_system * pci_sys);
70 
71 /**
72  * Attempt to access PCI subsystem using Linux's sysfs interface.
73  */
74 _pci_hidden int
pci_system_linux_sysfs_create(void)75 pci_system_linux_sysfs_create( void )
76 {
77     int err = 0;
78     struct stat st;
79 
80 
81     /* If the directory "/sys/bus/pci/devices" exists, then the PCI subsystem
82      * can be accessed using this interface.
83      */
84 
85     if ( stat( SYS_BUS_PCI, & st ) == 0 ) {
86 	pci_sys = calloc( 1, sizeof( struct pci_system ) );
87 	if ( pci_sys != NULL ) {
88 	    pci_sys->methods = & linux_sysfs_methods;
89 #ifdef HAVE_MTRR
90 	    pci_sys->mtrr_fd = open("/proc/mtrr", O_WRONLY);
91 #endif
92 	    err = populate_entries(pci_sys);
93 	}
94 	else {
95 	    err = ENOMEM;
96 	}
97     }
98     else {
99 	err = errno;
100     }
101 
102     return err;
103 }
104 
105 
106 /**
107  * Filter out the names "." and ".." from the scanned sysfs entries.
108  *
109  * \param d  Directory entry being processed by \c scandir.
110  *
111  * \return
112  * Zero if the entry name matches either "." or "..", non-zero otherwise.
113  *
114  * \sa scandir, populate_entries
115  */
116 static int
scan_sys_pci_filter(const struct dirent * d)117 scan_sys_pci_filter( const struct dirent * d )
118 {
119     return !((strcmp( d->d_name, "." ) == 0)
120 	     || (strcmp( d->d_name, ".." ) == 0));
121 }
122 
123 
124 int
populate_entries(struct pci_system * p)125 populate_entries( struct pci_system * p )
126 {
127     struct dirent ** devices = NULL;
128     int n;
129     int i;
130     int err = 0;
131 
132 
133     n = scandir( SYS_BUS_PCI, & devices, scan_sys_pci_filter, alphasort );
134     if ( n > 0 ) {
135 	p->num_devices = n;
136 	p->devices = calloc( n, sizeof( struct pci_device_private ) );
137 
138 	if (p->devices != NULL) {
139 	    for (i = 0 ; i < n ; i++) {
140 		uint8_t config[48];
141 		pciaddr_t bytes;
142 		unsigned dom, bus, dev, func;
143 		struct pci_device_private *device =
144 			(struct pci_device_private *) &p->devices[i];
145 
146 
147 		sscanf(devices[i]->d_name, "%04x:%02x:%02x.%1u",
148 		       & dom, & bus, & dev, & func);
149 
150 		device->base.domain = dom;
151 		device->base.bus = bus;
152 		device->base.dev = dev;
153 		device->base.func = func;
154 
155 
156 		err = pci_device_linux_sysfs_read(& device->base, config, 0,
157 						  48, & bytes);
158 		if ((bytes == 48) && !err) {
159 		    device->base.vendor_id = (uint16_t)config[0]
160 			+ ((uint16_t)config[1] << 8);
161 		    device->base.device_id = (uint16_t)config[2]
162 			+ ((uint16_t)config[3] << 8);
163 		    device->base.device_class = (uint32_t)config[9]
164 			+ ((uint32_t)config[10] << 8)
165 			+ ((uint32_t)config[11] << 16);
166 		    device->base.revision = config[8];
167 		    device->base.subvendor_id = (uint16_t)config[44]
168 			+ ((uint16_t)config[45] << 8);
169 		    device->base.subdevice_id = (uint16_t)config[46]
170 			+ ((uint16_t)config[47] << 8);
171 		}
172 
173 		if (err) {
174 		    break;
175 		}
176 	    }
177 	}
178 	else {
179 	    err = ENOMEM;
180 	}
181     }
182 
183     for (i = 0; i < n; i++)
184 	free(devices[i]);
185     free(devices);
186 
187     if (err) {
188 	free(p->devices);
189 	p->devices = NULL;
190     }
191 
192     return err;
193 }
194 
195 
196 static int
pci_device_linux_sysfs_probe(struct pci_device * dev)197 pci_device_linux_sysfs_probe( struct pci_device * dev )
198 {
199     char     name[256];
200     uint8_t  config[256];
201     char     resource[512];
202     int fd;
203     pciaddr_t bytes;
204     unsigned i;
205     int err;
206 
207 
208     err = pci_device_linux_sysfs_read( dev, config, 0, 256, & bytes );
209     if ( bytes >= 64 ) {
210 	struct pci_device_private *priv = (struct pci_device_private *) dev;
211 
212 	dev->irq = config[60];
213 	priv->header_type = config[14];
214 
215 
216 	/* The PCI config registers can be used to obtain information
217 	 * about the memory and I/O regions for the device.  However,
218 	 * doing so requires some tricky parsing (to correctly handle
219 	 * 64-bit memory regions) and requires writing to the config
220 	 * registers.  Since we'd like to avoid having to deal with the
221 	 * parsing issues and non-root users can write to PCI config
222 	 * registers, we use a different file in the device's sysfs
223 	 * directory called "resource".
224 	 *
225 	 * The resource file contains all of the needed information in
226 	 * a format that is consistent across all platforms.  Each BAR
227 	 * and the expansion ROM have a single line of data containing
228 	 * 3, 64-bit hex values:  the first address in the region,
229 	 * the last address in the region, and the region's flags.
230 	 */
231 	snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/resource",
232 		  SYS_BUS_PCI,
233 		  dev->domain,
234 		  dev->bus,
235 		  dev->dev,
236 		  dev->func );
237 	fd = open( name, O_RDONLY );
238 	if ( fd != -1 ) {
239 	    char * next;
240 	    pciaddr_t  low_addr;
241 	    pciaddr_t  high_addr;
242 	    pciaddr_t  flags;
243 
244 
245 	    bytes = read( fd, resource, 512 );
246 	    resource[511] = '\0';
247 
248 	    close( fd );
249 
250 	    next = resource;
251 	    for ( i = 0 ; i < 6 ; i++ ) {
252 
253 		dev->regions[i].base_addr = strtoull( next, & next, 16 );
254 		high_addr = strtoull( next, & next, 16 );
255 		flags = strtoull( next, & next, 16 );
256 
257 		if ( dev->regions[i].base_addr != 0 ) {
258 		    dev->regions[i].size = (high_addr
259 					    - dev->regions[i].base_addr) + 1;
260 
261 		    dev->regions[i].is_IO = (flags & 0x01);
262 		    dev->regions[i].is_64 = (flags & 0x04);
263 		    dev->regions[i].is_prefetchable = (flags & 0x08);
264 		}
265 	    }
266 
267 	    low_addr = strtoull( next, & next, 16 );
268 	    high_addr = strtoull( next, & next, 16 );
269 	    flags = strtoull( next, & next, 16 );
270 	    if ( low_addr != 0 ) {
271 		priv->rom_base = low_addr;
272 		dev->rom_size = (high_addr - low_addr) + 1;
273 	    }
274 	}
275     }
276 
277     return err;
278 }
279 
280 
281 static int
pci_device_linux_sysfs_read_rom(struct pci_device * dev,void * buffer)282 pci_device_linux_sysfs_read_rom( struct pci_device * dev, void * buffer )
283 {
284     char name[256];
285     int fd;
286     struct stat  st;
287     int err = 0;
288     size_t rom_size;
289     size_t total_bytes;
290 
291 
292     snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/rom",
293 	      SYS_BUS_PCI,
294 	      dev->domain,
295 	      dev->bus,
296 	      dev->dev,
297 	      dev->func );
298 
299     fd = open( name, O_RDWR );
300     if ( fd == -1 ) {
301 #ifdef LINUX_ROM
302 	/* If reading the ROM using sysfs fails, fall back to the old
303 	 * /dev/mem based interface.
304 	 * disable this for newer kernels using configure
305 	 */
306 	return pci_device_linux_devmem_read_rom(dev, buffer);
307 #else
308 	return errno;
309 #endif
310     }
311 
312 
313     if ( fstat( fd, & st ) == -1 ) {
314 	close( fd );
315 	return errno;
316     }
317 
318     rom_size = st.st_size;
319     if ( rom_size == 0 )
320 	rom_size = 0x10000;
321 
322     /* This is a quirky thing on Linux.  Even though the ROM and the file
323      * for the ROM in sysfs are read-only, the string "1" must be written to
324      * the file to enable the ROM.  After the data has been read, "0" must be
325      * written to the file to disable the ROM.
326      */
327     write( fd, "1", 1 );
328     lseek( fd, 0, SEEK_SET );
329 
330     for ( total_bytes = 0 ; total_bytes < rom_size ; /* empty */ ) {
331 	const int bytes = read( fd, (char *) buffer + total_bytes,
332 				rom_size - total_bytes );
333 	if ( bytes == -1 ) {
334 	    err = errno;
335 	    break;
336 	}
337 	else if ( bytes == 0 ) {
338 	    break;
339 	}
340 
341 	total_bytes += bytes;
342     }
343 
344 
345     lseek( fd, 0, SEEK_SET );
346     write( fd, "0", 1 );
347 
348     close( fd );
349     return err;
350 }
351 
352 
353 static int
pci_device_linux_sysfs_read(struct pci_device * dev,void * data,pciaddr_t offset,pciaddr_t size,pciaddr_t * bytes_read)354 pci_device_linux_sysfs_read( struct pci_device * dev, void * data,
355 			     pciaddr_t offset, pciaddr_t size,
356 			     pciaddr_t * bytes_read )
357 {
358     char name[256];
359     pciaddr_t temp_size = size;
360     int err = 0;
361     int fd;
362     char *data_bytes = data;
363 
364     if ( bytes_read != NULL ) {
365 	*bytes_read = 0;
366     }
367 
368     /* Each device has a directory under sysfs.  Within that directory there
369      * is a file named "config".  This file used to access the PCI config
370      * space.  It is used here to obtain most of the information about the
371      * device.
372      */
373     snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/config",
374 	      SYS_BUS_PCI,
375 	      dev->domain,
376 	      dev->bus,
377 	      dev->dev,
378 	      dev->func );
379 
380     fd = open( name, O_RDONLY );
381     if ( fd == -1 ) {
382 	return errno;
383     }
384 
385 
386     while ( temp_size > 0 ) {
387 	const ssize_t bytes = pread64( fd, data_bytes, temp_size, offset );
388 
389 	/* If zero bytes were read, then we assume it's the end of the
390 	 * config file.
391 	 */
392 	if (bytes == 0)
393 	    break;
394 	if ( bytes < 0 ) {
395 	    err = errno;
396 	    break;
397 	}
398 
399 	temp_size -= bytes;
400 	offset += bytes;
401 	data_bytes += bytes;
402     }
403 
404     if ( bytes_read != NULL ) {
405 	*bytes_read = size - temp_size;
406     }
407 
408     close( fd );
409     return err;
410 }
411 
412 
413 static int
pci_device_linux_sysfs_write(struct pci_device * dev,const void * data,pciaddr_t offset,pciaddr_t size,pciaddr_t * bytes_written)414 pci_device_linux_sysfs_write( struct pci_device * dev, const void * data,
415 			     pciaddr_t offset, pciaddr_t size,
416 			     pciaddr_t * bytes_written )
417 {
418     char name[256];
419     pciaddr_t temp_size = size;
420     int err = 0;
421     int fd;
422     const char *data_bytes = data;
423 
424     if ( bytes_written != NULL ) {
425 	*bytes_written = 0;
426     }
427 
428     /* Each device has a directory under sysfs.  Within that directory there
429      * is a file named "config".  This file used to access the PCI config
430      * space.  It is used here to obtain most of the information about the
431      * device.
432      */
433     snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/config",
434 	      SYS_BUS_PCI,
435 	      dev->domain,
436 	      dev->bus,
437 	      dev->dev,
438 	      dev->func );
439 
440     fd = open( name, O_WRONLY );
441     if ( fd == -1 ) {
442 	return errno;
443     }
444 
445 
446     while ( temp_size > 0 ) {
447 	const ssize_t bytes = pwrite64( fd, data_bytes, temp_size, offset );
448 
449 	/* If zero bytes were written, then we assume it's the end of the
450 	 * config file.
451 	 */
452 	if ( bytes == 0 )
453 	    break;
454 	if ( bytes < 0 ) {
455 	    err = errno;
456 	    break;
457 	}
458 
459 	temp_size -= bytes;
460 	offset += bytes;
461 	data_bytes += bytes;
462     }
463 
464     if ( bytes_written != NULL ) {
465 	*bytes_written = size - temp_size;
466     }
467 
468     close( fd );
469     return err;
470 }
471 
472 static int
pci_device_linux_sysfs_map_range_wc(struct pci_device * dev,struct pci_device_mapping * map)473 pci_device_linux_sysfs_map_range_wc(struct pci_device *dev,
474 				    struct pci_device_mapping *map)
475 {
476     char name[256];
477     int fd;
478     const int prot = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
479         ? (PROT_READ | PROT_WRITE) : PROT_READ;
480     const int open_flags = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
481         ? O_RDWR : O_RDONLY;
482     const off_t offset = map->base - dev->regions[map->region].base_addr;
483 
484     snprintf(name, 255, "%s/%04x:%02x:%02x.%1u/resource%u_wc",
485 	     SYS_BUS_PCI,
486 	     dev->domain,
487 	     dev->bus,
488 	     dev->dev,
489 	     dev->func,
490 	     map->region);
491     fd = open(name, open_flags);
492     if (fd == -1)
493 	    return errno;
494 
495     map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
496     if (map->memory == MAP_FAILED) {
497         map->memory = NULL;
498 	close(fd);
499 	return errno;
500     }
501 
502     close(fd);
503 
504     return 0;
505 }
506 
507 /**
508  * Map a memory region for a device using the Linux sysfs interface.
509  *
510  * \param dev   Device whose memory region is to be mapped.
511  * \param map   Parameters of the mapping that is to be created.
512  *
513  * \return
514  * Zero on success or an \c errno value on failure.
515  *
516  * \sa pci_device_map_rrange, pci_device_linux_sysfs_unmap_range
517  *
518  * \todo
519  * Some older 2.6.x kernels don't implement the resourceN files.  On those
520  * systems /dev/mem must be used.  On these systems it is also possible that
521  * \c mmap64 may need to be used.
522  */
523 static int
pci_device_linux_sysfs_map_range(struct pci_device * dev,struct pci_device_mapping * map)524 pci_device_linux_sysfs_map_range(struct pci_device *dev,
525                                  struct pci_device_mapping *map)
526 {
527     char name[256];
528     int fd;
529     int err = 0;
530     const int prot = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
531         ? (PROT_READ | PROT_WRITE) : PROT_READ;
532     const int open_flags = ((map->flags & PCI_DEV_MAP_FLAG_WRITABLE) != 0)
533         ? O_RDWR : O_RDONLY;
534     const off_t offset = map->base - dev->regions[map->region].base_addr;
535 #ifdef HAVE_MTRR
536     struct mtrr_sentry sentry = {
537 	.base = map->base,
538         .size = map->size,
539 	.type = MTRR_TYPE_UNCACHABLE
540     };
541 #endif
542 
543     /* For WC mappings, try sysfs resourceN_wc file first */
544     if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) &&
545 	!pci_device_linux_sysfs_map_range_wc(dev, map))
546 	    return 0;
547 
548     snprintf(name, 255, "%s/%04x:%02x:%02x.%1u/resource%u",
549              SYS_BUS_PCI,
550              dev->domain,
551              dev->bus,
552              dev->dev,
553              dev->func,
554              map->region);
555 
556     fd = open(name, open_flags);
557     if (fd == -1) {
558         return errno;
559     }
560 
561 
562     map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
563     if (map->memory == MAP_FAILED) {
564         map->memory = NULL;
565 	close(fd);
566 	return errno;
567     }
568 
569 #ifdef HAVE_MTRR
570     if ((map->flags & PCI_DEV_MAP_FLAG_CACHABLE) != 0) {
571         sentry.type = MTRR_TYPE_WRBACK;
572     } else if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) != 0) {
573         sentry.type = MTRR_TYPE_WRCOMB;
574     }
575 
576     if (pci_sys->mtrr_fd != -1 && sentry.type != MTRR_TYPE_UNCACHABLE) {
577 	if (ioctl(pci_sys->mtrr_fd, MTRRIOC_ADD_ENTRY, &sentry) < 0) {
578 	    /* FIXME: Should we report an error in this case?
579 	     */
580 	    fprintf(stderr, "error setting MTRR "
581 		    "(base = 0x%08lx, size = 0x%08x, type = %u) %s (%d)\n",
582 		    sentry.base, sentry.size, sentry.type,
583 		    strerror(errno), errno);
584 /*            err = errno;*/
585 	}
586 	/* KLUDGE ALERT -- rewrite the PTEs to turn off the CD and WT bits */
587 	mprotect (map->memory, map->size, PROT_NONE);
588 	err = mprotect (map->memory, map->size, PROT_READ|PROT_WRITE);
589 
590 	if (err != 0) {
591 	    fprintf(stderr, "mprotect(PROT_READ | PROT_WRITE) failed: %s\n",
592 		    strerror(errno));
593 	    fprintf(stderr, "remapping without mprotect performance kludge.\n");
594 
595 	    munmap(map->memory, map->size);
596 	    map->memory = mmap(NULL, map->size, prot, MAP_SHARED, fd, offset);
597 	    if (map->memory == MAP_FAILED) {
598 		map->memory = NULL;
599 		close(fd);
600 		return errno;
601 	    }
602 	}
603     }
604 #endif
605 
606     close(fd);
607 
608     return 0;
609 }
610 
611 /**
612  * Unmap a memory region for a device using the Linux sysfs interface.
613  *
614  * \param dev   Device whose memory region is to be unmapped.
615  * \param map   Parameters of the mapping that is to be destroyed.
616  *
617  * \return
618  * Zero on success or an \c errno value on failure.
619  *
620  * \sa pci_device_map_rrange, pci_device_linux_sysfs_map_range
621  *
622  * \todo
623  * Some older 2.6.x kernels don't implement the resourceN files.  On those
624  * systems /dev/mem must be used.  On these systems it is also possible that
625  * \c mmap64 may need to be used.
626  */
627 static int
pci_device_linux_sysfs_unmap_range(struct pci_device * dev,struct pci_device_mapping * map)628 pci_device_linux_sysfs_unmap_range(struct pci_device *dev,
629 				   struct pci_device_mapping *map)
630 {
631     int err = 0;
632 #ifdef HAVE_MTRR
633     struct mtrr_sentry sentry = {
634 	.base = map->base,
635         .size = map->size,
636 	.type = MTRR_TYPE_UNCACHABLE
637     };
638 #endif
639 
640     err = pci_device_generic_unmap_range (dev, map);
641     if (err)
642 	return err;
643 
644 #ifdef HAVE_MTRR
645     if ((map->flags & PCI_DEV_MAP_FLAG_CACHABLE) != 0) {
646         sentry.type = MTRR_TYPE_WRBACK;
647     } else if ((map->flags & PCI_DEV_MAP_FLAG_WRITE_COMBINE) != 0) {
648         sentry.type = MTRR_TYPE_WRCOMB;
649     }
650 
651     if (pci_sys->mtrr_fd != -1 && sentry.type != MTRR_TYPE_UNCACHABLE) {
652 	if (ioctl(pci_sys->mtrr_fd, MTRRIOC_DEL_ENTRY, &sentry) < 0) {
653 	    /* FIXME: Should we report an error in this case?
654 	     */
655 	    fprintf(stderr, "error setting MTRR "
656 		    "(base = 0x%08lx, size = 0x%08x, type = %u) %s (%d)\n",
657 		    sentry.base, sentry.size, sentry.type,
658 		    strerror(errno), errno);
659 /*            err = errno;*/
660 	}
661     }
662 #endif
663 
664     return err;
665 }
666 
pci_device_linux_sysfs_enable(struct pci_device * dev)667 static void pci_device_linux_sysfs_enable(struct pci_device *dev)
668 {
669     char name[256];
670     int fd;
671 
672     snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/enable",
673 	      SYS_BUS_PCI,
674 	      dev->domain,
675 	      dev->bus,
676 	      dev->dev,
677 	      dev->func );
678 
679     fd = open( name, O_RDWR );
680     if (fd == -1)
681        return;
682 
683     write( fd, "1", 1 );
684     close(fd);
685 }
686 
pci_device_linux_sysfs_boot_vga(struct pci_device * dev)687 static int pci_device_linux_sysfs_boot_vga(struct pci_device *dev)
688 {
689     char name[256];
690     char reply[3];
691     int fd, bytes_read;
692     int ret = 0;
693 
694     snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/boot_vga",
695 	      SYS_BUS_PCI,
696 	      dev->domain,
697 	      dev->bus,
698 	      dev->dev,
699 	      dev->func );
700 
701     fd = open( name, O_RDONLY );
702     if (fd == -1)
703        return 0;
704 
705     bytes_read = read(fd, reply, 1);
706     if (bytes_read != 1)
707 	goto out;
708     if (reply[0] == '1')
709 	ret = 1;
710 out:
711     close(fd);
712     return ret;
713 }
714 
pci_device_linux_sysfs_has_kernel_driver(struct pci_device * dev)715 static int pci_device_linux_sysfs_has_kernel_driver(struct pci_device *dev)
716 {
717     char name[256];
718     struct stat dummy;
719     int ret;
720 
721     snprintf( name, 255, "%s/%04x:%02x:%02x.%1u/driver",
722 	      SYS_BUS_PCI,
723 	      dev->domain,
724 	      dev->bus,
725 	      dev->dev,
726 	      dev->func );
727 
728     ret = stat(name, &dummy);
729     if (ret < 0)
730 	return 0;
731     return 1;
732 }
733 
734 static struct pci_io_handle *
pci_device_linux_sysfs_open_device_io(struct pci_io_handle * ret,struct pci_device * dev,int bar,pciaddr_t base,pciaddr_t size)735 pci_device_linux_sysfs_open_device_io(struct pci_io_handle *ret,
736 				      struct pci_device *dev, int bar,
737 				      pciaddr_t base, pciaddr_t size)
738 {
739     char name[PATH_MAX];
740 
741     snprintf(name, PATH_MAX, "%s/%04x:%02x:%02x.%1u/resource%d",
742 	     SYS_BUS_PCI, dev->domain, dev->bus, dev->dev, dev->func, bar);
743 
744     ret->fd = open(name, O_RDWR);
745 
746     if (ret->fd < 0)
747 	return NULL;
748 
749     ret->base = base;
750     ret->size = size;
751 
752     return ret;
753 }
754 
755 static struct pci_io_handle *
pci_device_linux_sysfs_open_legacy_io(struct pci_io_handle * ret,struct pci_device * dev,pciaddr_t base,pciaddr_t size)756 pci_device_linux_sysfs_open_legacy_io(struct pci_io_handle *ret,
757 				      struct pci_device *dev, pciaddr_t base,
758 				      pciaddr_t size)
759 {
760     char name[PATH_MAX];
761 
762     /* First check if there's a legacy io method for the device */
763     while (dev) {
764 	snprintf(name, PATH_MAX, "/sys/class/pci_bus/%04x:%02x/legacy_io",
765 		 dev->domain, dev->bus);
766 
767 	ret->fd = open(name, O_RDWR);
768 	if (ret->fd >= 0)
769 	    break;
770 
771 	dev = pci_device_get_parent_bridge(dev);
772     }
773 
774     /* If not, /dev/port is the best we can do */
775     if (!dev)
776 	ret->fd = open("/dev/port", O_RDWR);
777 
778     if (ret->fd < 0)
779 	return NULL;
780 
781     ret->base = base;
782     ret->size = size;
783 
784     return ret;
785 }
786 
787 static void
pci_device_linux_sysfs_close_io(struct pci_device * dev,struct pci_io_handle * handle)788 pci_device_linux_sysfs_close_io(struct pci_device *dev,
789 				struct pci_io_handle *handle)
790 {
791     close(handle->fd);
792 }
793 
794 static uint32_t
pci_device_linux_sysfs_read32(struct pci_io_handle * handle,uint32_t port)795 pci_device_linux_sysfs_read32(struct pci_io_handle *handle, uint32_t port)
796 {
797     uint32_t ret;
798 
799     pread(handle->fd, &ret, 4, port + handle->base);
800 
801     return ret;
802 }
803 
804 static uint16_t
pci_device_linux_sysfs_read16(struct pci_io_handle * handle,uint32_t port)805 pci_device_linux_sysfs_read16(struct pci_io_handle *handle, uint32_t port)
806 {
807     uint16_t ret;
808 
809     pread(handle->fd, &ret, 2, port + handle->base);
810 
811     return ret;
812 }
813 
814 static uint8_t
pci_device_linux_sysfs_read8(struct pci_io_handle * handle,uint32_t port)815 pci_device_linux_sysfs_read8(struct pci_io_handle *handle, uint32_t port)
816 {
817     uint8_t ret;
818 
819     pread(handle->fd, &ret, 1, port + handle->base);
820 
821     return ret;
822 }
823 
824 static void
pci_device_linux_sysfs_write32(struct pci_io_handle * handle,uint32_t port,uint32_t data)825 pci_device_linux_sysfs_write32(struct pci_io_handle *handle, uint32_t port,
826 			       uint32_t data)
827 {
828     pwrite(handle->fd, &data, 4, port + handle->base);
829 }
830 
831 static void
pci_device_linux_sysfs_write16(struct pci_io_handle * handle,uint32_t port,uint16_t data)832 pci_device_linux_sysfs_write16(struct pci_io_handle *handle, uint32_t port,
833 			       uint16_t data)
834 {
835     pwrite(handle->fd, &data, 2, port + handle->base);
836 }
837 
838 static void
pci_device_linux_sysfs_write8(struct pci_io_handle * handle,uint32_t port,uint8_t data)839 pci_device_linux_sysfs_write8(struct pci_io_handle *handle, uint32_t port,
840 			      uint8_t data)
841 {
842     pwrite(handle->fd, &data, 1, port + handle->base);
843 }
844 
845 static int
pci_device_linux_sysfs_map_legacy(struct pci_device * dev,pciaddr_t base,pciaddr_t size,unsigned map_flags,void ** addr)846 pci_device_linux_sysfs_map_legacy(struct pci_device *dev, pciaddr_t base,
847 				  pciaddr_t size, unsigned map_flags, void **addr)
848 {
849     char name[PATH_MAX];
850     int flags = O_RDONLY;
851     int prot = PROT_READ;
852     int fd;
853     int ret=0;
854 
855     if (map_flags & PCI_DEV_MAP_FLAG_WRITABLE) {
856 	flags = O_RDWR; /* O_RDWR != O_WRONLY | O_RDONLY */;
857 	prot |= PROT_WRITE;
858     }
859 
860     /* First check if there's a legacy memory method for the device */
861     while (dev) {
862 	snprintf(name, PATH_MAX, "/sys/class/pci_bus/%04x:%02x/legacy_mem",
863 		 dev->domain, dev->bus);
864 
865 	fd = open(name, flags);
866 	if (fd >= 0)
867 	    break;
868 
869 	dev = pci_device_get_parent_bridge(dev);
870     }
871 
872     /* If not, /dev/mem is the best we can do */
873     if (!dev)
874 	fd = open("/dev/mem", flags);
875 
876     if (fd < 0)
877 	return errno;
878 
879     *addr = mmap(NULL, size, prot, MAP_SHARED, fd, base);
880     if (*addr == MAP_FAILED) {
881 	ret = errno;
882     }
883 
884     close(fd);
885     return ret;
886 }
887 
888 static int
pci_device_linux_sysfs_unmap_legacy(struct pci_device * dev,void * addr,pciaddr_t size)889 pci_device_linux_sysfs_unmap_legacy(struct pci_device *dev, void *addr, pciaddr_t size)
890 {
891     return munmap(addr, size);
892 }
893 
894 
895 static void
pci_system_linux_destroy(void)896 pci_system_linux_destroy(void)
897 {
898 #ifdef HAVE_MTRR
899 	if (pci_sys->mtrr_fd != -1)
900 		close(pci_sys->mtrr_fd);
901 #endif
902 }
903 
904 static const struct pci_system_methods linux_sysfs_methods = {
905     .destroy = pci_system_linux_destroy,
906     .destroy_device = NULL,
907     .read_rom = pci_device_linux_sysfs_read_rom,
908     .probe = pci_device_linux_sysfs_probe,
909     .map_range = pci_device_linux_sysfs_map_range,
910     .unmap_range = pci_device_linux_sysfs_unmap_range,
911 
912     .read = pci_device_linux_sysfs_read,
913     .write = pci_device_linux_sysfs_write,
914 
915     .fill_capabilities = pci_fill_capabilities_generic,
916     .enable = pci_device_linux_sysfs_enable,
917     .boot_vga = pci_device_linux_sysfs_boot_vga,
918     .has_kernel_driver = pci_device_linux_sysfs_has_kernel_driver,
919 
920     .open_device_io = pci_device_linux_sysfs_open_device_io,
921     .open_legacy_io = pci_device_linux_sysfs_open_legacy_io,
922     .close_io = pci_device_linux_sysfs_close_io,
923     .read32 = pci_device_linux_sysfs_read32,
924     .read16 = pci_device_linux_sysfs_read16,
925     .read8 = pci_device_linux_sysfs_read8,
926     .write32 = pci_device_linux_sysfs_write32,
927     .write16 = pci_device_linux_sysfs_write16,
928     .write8 = pci_device_linux_sysfs_write8,
929 
930     .map_legacy = pci_device_linux_sysfs_map_legacy,
931     .unmap_legacy = pci_device_linux_sysfs_unmap_legacy,
932 };
933