帮酷LOGO
0 0 评论
以下是Python模块BeautifulSoup.BeautifulSoup的代码最佳示例,显示该如何使用sys.stdout。 它们是从开源Python项目中提取出来的。

实例 1


def _set_contents(self, string_or_soup):
        if isinstance(string_or_soup, BeautifulSoup.BeautifulSoup) :
            self._set_soup(string_or_soup)
        else :
            self._set_string(string_or_soup)
     

实例 2


def login():
        resp = opener.open('http://www.glarab.com/')
        html_data = resp.read();
        soup = BeautifulSoup(html_data)
        eventVal = soup.find('input',id='__EVENTVALIDATION',type='hidden')
        viewState = soup.find('input',id='__VIEWSTATE',type='hidden')
        loginURL = 'http://www.glarab.com/homepage.aspx'
        data = '__EVENTARGUMENT=&__EVENTTARGET=&__EVENTVALIDATION=%s&__VIEWSTATE=%s&pageHeader%%24ScriptManager1=pageHeader%%24UpdatePanel1%%7CpageHeader%%24buttonLogin&pageHeader%%24buttonLogin=%%20&pageHeader%%24txtPassword=%s&pageHeader%%24txtUsername=%s' % (urllib.quote(eventVal['value']), urllib.quote(viewState['value']), urllib.quote(__settings__.getSetting('password')), urllib.quote(__settings__.getSetting('username')))
        opener.open(loginURL, data)
        resp = opener.open('http://www.glarab.com/ajax.aspx?channel=tvlist&type=reg&genre=1')
        html_data = resp.read();
        return html_data != 'NoAccess'  
 

实例 3


def getCategories():
        if __settings__.getSetting('paid_account') == "true":
                while not login():
                        xbmc.executebuiltin("XBMC.Notification('GLArab','INVALID username and/or password.',30000,"+icon+")")
                        __settings__.openSettings()
                try:
                        resp = opener.open('http://www.glarab.com/ajax.aspx?channel=tvlist&type=reg&genre=1')
                        html_data = resp.read();
                        soup = BeautifulSoup(html_data)
                        categories = soup.find('ul',id='categoryContainer')
                        pattern = re.compile('tvChannelsStart('(.*?)');')
                        for li in categories:
                                name = li.contents[0].strip()
                                dirurl = pattern.search(li['onclick']).groups()[0]
                                dirurl = 'http://www.glarab.com/ajax.aspx?channel=tv&genre=' + dirurl
                                addDir(name,dirurl,1)                   
                except:
                        return
        else:
                try:
                        resp = opener.open('http://www.glarab.com/ajax.aspx?channel=tv&type=free&genre=1')
                        html_data = resp.read();
                        soup = BeautifulSoup(html_data)
                        categories = soup.find('ul',id='listContainerTopMenu')
                        pattern = re.compile('&genre=(.*?)&')
                        for li in categories:
                                name = li.contents[0].strip()
                                dirurl = 'http://www.glarab.com/ajax.aspx?channel=tv&genre=' + pattern.search(li['onclick']).groups()[0]
                                addDir(name,dirurl,1)
                except:
                        return
 

实例 4


def getChannels(url):
        if __settings__.getSetting('paid_account') == "true":
                while not login():
                        xbmc.executebuiltin("XBMC.Notification('GLArab','INVALID username and/or password.',30000,"+icon+")")
                        __settings__.openSettings()
                url += '&type=reg'
        else:
                url += '&type=free'
        resp = opener.open(url)
        inner_data = resp.read();
        inner_soup = BeautifulSoup(inner_data)
        container = inner_soup.find('div',id='listContainerScroll')
        thumbnail = "DefaultVideo.png"
        pattern = re.compile("makeHttpRequest('(.*?)&',")
        NUM_SOCKETS = 5
        NUM_WORKERS = 8
        http = urllib3.PoolManager(maxsize=NUM_SOCKETS)
        workers = workerpool.WorkerPool(size=NUM_WORKERS)
        for span in container:
                workers.put(FetchJob(span, pattern, http))
        workers.shutdown()
        workers.wait()
 

实例 5


def fetch_title(self, url):
        headers = {
            'User-agent': 'Lampstand IRC Bot (contact [email protected])'}
        try:
            req = requests.get(url, headers=headers, timeout=30)
        except requests.exceptions.Timeout:
            return "That link timed out"
        except requests.exceptions.SSLError as e:
            return "Something's up with the security on %s. Tread carefully. (%s)" % (
                urlp.netloc, e)
        k = len(req.content) / 1024
        if req.status_code != 200:
            title = "That link returned an error %s" % (req.status_code)
        elif req.headers['content-type'].find("text/html") != -1 or req.headers['content-type'].find("application/xhtml+xml") != -1:
            soup = BeautifulSoup.BeautifulSoup(
                req.text,
                convertEntities=BeautifulSoup.BeautifulSoup.HTML_ENTITIES)
            title = soup.title.string
        else:
            if req.headers['content-type'].find("image/") == 0:
                image_file = StringIO.StringIO(req.content)
                #color = most_colour.most_colour(image_file)
                image_file.seek(0)
                im = Image.open(image_file)
                try:
                    im.seek(1)
                    title = "An animation, %dx%d (%dk)" % (
                        im.size[0], im.size[1], k)
                except:
                    title = "An image, %dx%d (%dk)" % (
                        im.size[0], im.size[1], k)
            else:
                title = "A %s file (%dk)" % (
                    req.headers['content-type'], k)
        return title
 

实例 6


def _zap2it_parse_providers(htmlText):
    soup = BeautifulSoup.BeautifulSoup()
    soup.feed(htmlText)
    providers = {}
    options = soup("option")
    for option in options:
        provId = int(convertEntities(str(option["value"])).strip())
        provider = convertEntities(str(option.contents[0])).strip()
        providers[provId] = provider
    return providers
 

实例 7


def version(self, irc, msg, args, optlist, branch, package):
        """[--exact] [{stable,testing,unstable,experimental}] <package name>
        Returns the current version(s) of a Debian package in the given branch
        (if any, otherwise all available ones are displayed).  If --exact is
        specified, only packages whose name exactly matches <package name>
        will be reported.
        """
        url = 'http://packages.debian.org/cgi-bin/search_packages.pl?keywords'
              '=%s&searchon=names&version=%s&release=all&subword=1'
        for (option, _) in optlist:
            if option == 'exact':
                url = url.replace('&subword=1','')
        responses = []
        if '*' in package:
            irc.error('Wildcard characters can not be specified.', Raise=True)
        package = utils.web.urlquote(package)
        url %= (package, branch)
        try:
            html = utils.web.getUrl(url)
        except utils.web.Error, e:
            irc.error(format('I couldn't reach the search page (%s).', e),
                      Raise=True)
        if 'is down at the moment' in html:
            irc.error('Packages.debian.org is down at the moment.  '
                      'Please try again later.', Raise=True)
        pkgs = self._deblistre.findall(html)
        if not pkgs:
            irc.reply(format('No package found for %s (%s)',
                      utils.web.urlunquote(package), branch))
        else:
            for pkg in pkgs:
                pkgMatch = pkg[0]
                soup = BeautifulSoup.BeautifulSoup()
                soup.feed(pkg[1])
                liBranches = soup.fetch('li')
                branches = []
                versions = []
                def branchVers(br):
                    vers = [b.next.string.strip() for b in br]
                    return [utils.str.rsplit(v, ':', 1)[0] for v in vers]
                for li in liBranches:
                    branches.append(li.a.string)
                    versions.append(branchVers(li.fetch('br')))
                if branches and versions:
                    for pairs in  zip(branches, versions):
                        branch = pairs[0]
                        ver = ', '.join(pairs[1])
                        s = format('%s (%s)', pkgMatch,
                                   ': '.join([branch, ver]))
                        responses.append(s)
            resp = format('%i matches found: %s',
                          len(responses), '; '.join(responses))
            irc.reply(resp)
     

实例 8


def getEpisodes(self, showID):
        f = urllib2.urlopen(EPISODE_URL % (showID))
        text = f.read()
        f.close()
        # Grab all gridshow divs in slider1
        soup = BeautifulSoup(''.join(text));
        slider1 = soup.find('div', id='slider1')
        for mymatch in slider1.findAll('div', id='gridshow'):
            # Default values
            description = 'None'
            link = 'None'
            mp4URL = 'None'
            # ListItem properties
            img = mymatch.a.img['src']
            url = mymatch.a['href']
            datestr = mymatch.find('span', id='griddate').string
            the_title = mymatch.find('span', id='gridcaption').string
            description = mymatch.a['title']
            # Look for the higher resolution image 
            img = img.replace('thumbnail.jpg', 'preview_vp.jpg')
            year = datetime.datetime.now().year
            yield {'Channel'      : CHANNEL,
                    'Thumb'       : img,
                    'Fanart_Image': img,
                    'url'         : TV3_URL + url,
                    'Title'       : the_title,
                    'mode'        : MenuConstants.MODE_PLAYVIDEO,
                    'Plot'        : description[0],
                    'plotoutline' : description[0],
                    'Date'        : datestr,
                    'Year'        : year,
                    'Studio'      : CHANNEL
                    }
     

实例 9


def getDOMImplementation(dom=None, **kwds):
    return DOMImplementation(dom if dom else BeautifulSoup.BeautifulSoup(), **kwds)
 

实例 10


def wunder(self, irc, msg, args, loc):
            """<US zip code | US/Canada city, state | Foreign city, country>
            Returns the approximate weather conditions for a given city.
            """
            url = '%s%s' % (self._wunderUrl, utils.web.urlquote(loc))
            text = utils.web.getUrl(url, headers=Weather.headers)
            if 'Search not found' in text or 
               re.search(r'size="2"> Place </font>', text, re.I):
                Weather._noLocation()
            if 'Place: Temperature' in text:
                m = self._backupUrl.search(text)
                if m is not None:
                    url = 'http://www.wunderground.com' + m.group(1)
                    text = utils.web.getUrl(url, headers=Weather.headers)
                    self._rss(irc, text)
                    return
            severe = ''
            m = self._wunderSevere.search(text)
            if m:
                severe = ircutils.bold(format('  %s', m.group(1)))
            soup = BeautifulSoup.BeautifulSoup()
            soup.feed(text)
            # Get the table with all the weather info
            table = soup.first('table', {'border':'1'})
            if table is BeautifulSoup.Null:
                Weather._noLocation()
            trs = table.fetch('tr')
            try:
                time = trs.pop(0).b.string
            except AttributeError:
                time = ''
            info = {}
            def isText(t):
                return not isinstance(t, BeautifulSoup.NavigableText) 
                       and t.contents
            def getText(t):
                s = t.string
                if s is BeautifulSoup.Null:
                    t = t.contents
                    num = t[0].string
                    units = t[1].string
                    # htmlToText strips leading whitespace, so we have to
                    # handle strings with &nbsp; differently.
                    if units.startswith('&nbsp;'):
                        units = utils.web.htmlToText(units)
                        s = ' '.join((num, units))
                    else:
                        units = utils.web.htmlToText(units)
                        s = ' '.join((num, units[0], units[1:]))
                return s
            for tr in trs:
                k = tr.td.string
                v = filter(isText, tr.fetch('td')[1].contents)
                value = map(getText, v)
                info[k] = ' '.join(value)
            location = self._wunderLoc.search(text)
            temp = info['Temperature']
            convert = conf.get(conf.supybot.plugins.Weather.convert,
                               msg.args[0])
            if location and temp:
                (temp, deg, unit) = temp.split()[3:] # We only want temp format
                if convert:
                    temp = Weather._getTemp(int(temp), deg, unit, msg.args[0])
                else:
                    temp = deg.join((temp, unit))
                resp = ['The current temperature in %s is %s (%s).' %
                        (location.group(1), temp, time)]
                conds = info['Conditions']
                resp.append('Conditions: %s.' % info['Conditions'])
                humidity = info['Humidity']
                resp.append('Humidity: %s.' % info['Humidity'])
                # Apparently, the "Dew Point" and "Wind" categories are
                # occasionally set to "-" instead of an actual reading. So,
                # we'll just catch the ValueError from trying to unpack a tuple
                # of the wrong size.
                try:
                    (dew, deg, unit) = info['Dew Point'].split()[3:]
                    if convert:
                        dew = Weather._getTemp(int(dew), deg,
                                               unit, msg.args[0])
                    else:
                        dew = deg.join((dew, unit))
                    resp.append('Dew Point: %s.' % dew)
                except (ValueError, KeyError):
                    pass
                try:
                    wind = 'Wind: %s at %s %s.' % tuple(info['Wind'].split())
                    resp.append(wind)
                except (ValueError, TypeError):
                    pass
                try:
                    (chill, deg, unit) = info['Windchill'].split()[3:]
                    if convert:
                        chill = Weather._getTemp(int(chill), deg,
                                                 unit, msg.args[0])
                    else:
                        dew = deg.join((chill, unit))
                    resp.append('Windchill: %s.' % chill)
                except (ValueError, KeyError):
                    pass
                if info['Pressure']:
                    resp.append('Pressure: %s.' % info['Pressure'])
                if info['Visibility']:
                    resp.append('Visibility: %s.' % info['Visibility'])
                resp.append(severe)
                resp = map(utils.web.htmlToText, resp)
                irc.reply(' '.join(resp))
            else:
                irc.error('Could not find weather information.')
         

实例 11


def wunder(self, irc, msg, args, loc):
            """<US zip code | US/Canada city, state | Foreign city, country>
            Returns the approximate weather conditions for a given city.
            """
            url = '%s%s' % (self._wunderUrl, utils.web.urlquote(loc))
            text = utils.web.getUrl(url, headers=Weather.headers)
            if 'Search not found' in text or 
               re.search(r'size="2"> Place </font>', text, re.I):
                Weather._noLocation()
            if 'Place: Temperature' in text:
                m = self._backupUrl.search(text)
                if m is not None:
                    url = 'http://www.wunderground.com' + m.group(1)
                    text = utils.web.getUrl(url, headers=Weather.headers)
                    self._rss(irc, text)
                    return
            severe = ''
            m = self._wunderSevere.search(text)
            if m:
                severe = ircutils.bold(format('  %s', m.group(1)))
            soup = BeautifulSoup.BeautifulSoup()
            soup.feed(text)
            # Get the table with all the weather info
            table = soup.first('table', {'border':'1'})
            if not table:
                Weather._noLocation()
            trs = table.fetch('tr')
            (time, location) = trs.pop(0).fetch('b')
            time = time.string
            location = location.string
            info = {}
            def isText(t):
                return not isinstance(t, BeautifulSoup.NavigableText) 
                       and t.contents
            def getText(t):
                s = t.string
                if s is BeautifulSoup.Null:
                    t = t.contents
                    num = t[0].string
                    units = t[1].string
                    # htmlToText strips leading whitespace, so we have to
                    # handle strings with &nbsp; differently.
                    if units.startswith('&nbsp;'):
                        units = utils.web.htmlToText(units)
                        s = ' '.join((num, units))
                    else:
                        units = utils.web.htmlToText(units)
                        s = ' '.join((num, units[0], units[1:]))
                return s
            for tr in trs:
                k = tr.td.string
                v = filter(isText, tr.fetch('td')[1].contents)
                value = map(getText, v)
                info[k] = ' '.join(value)
            temp = info['Temperature']
            convert = conf.get(conf.supybot.plugins.Weather.convert,
                               msg.args[0])
            if location and temp:
                (temp, deg, unit) = temp.split()[3:] # We only want temp format
                if convert:
                    temp = Weather._getTemp(float(temp), deg, unit, msg.args[0])
                else:
                    temp = deg.join((temp, unit))
                resp = ['The current temperature in %s is %s (%s).' %
                        (location, temp, time)]
                conds = info['Conditions']
                resp.append('Conditions: %s.' % info['Conditions'])
                humidity = info['Humidity']
                resp.append('Humidity: %s.' % info['Humidity'])
                # Apparently, the "Dew Point" and "Wind" categories are
                # occasionally set to "-" instead of an actual reading. So,
                # we'll just catch the ValueError from trying to unpack a tuple
                # of the wrong size.
                try:
                    (dew, deg, unit) = info['Dew Point'].split()[3:]
                    if convert:
                        dew = Weather._getTemp(float(dew), deg,
                                               unit, msg.args[0])
                    else:
                        dew = deg.join((dew, unit))
                    resp.append('Dew Point: %s.' % dew)
                except (ValueError, KeyError):
                    pass
                try:
                    wind = 'Wind: %s at %s %s.' % tuple(info['Wind'].split())
                    resp.append(wind)
                except (ValueError, TypeError):
                    pass
                try:
                    (chill, deg, unit) = info['Windchill'].split()[3:]
                    if convert:
                        chill = Weather._getTemp(float(chill), deg,
                                                 unit, msg.args[0])
                    else:
                        dew = deg.join((chill, unit))
                    resp.append('Windchill: %s.' % chill)
                except (ValueError, KeyError):
                    pass
                if info['Pressure']:
                    resp.append('Pressure: %s.' % info['Pressure'])
                resp.append(severe)
                resp = map(utils.web.htmlToText, resp)
                irc.reply(' '.join(resp))
            else:
                Weather._noLocation()
         



Copyright © 2011 HelpLib All rights reserved.    知识分享协议 京ICP备05059198号-3  |  如果智培  |  酷兔英语