Source code for wwt_data_formats.folder

# -*- mode: python; coding: utf-8 -*-
# Copyright 2019-2021 the .NET Foundation
# Licensed under the MIT License.

from __future__ import absolute_import, division, print_function

__all__ = """
Folder
fetch_folder_tree
make_absolutizing_url_mutator
make_filesystem_url_mutator
walk_cached_folder_tree
""".split()

import os.path
import re
import requests
from traitlets import Bool, Instance, Int, List, Unicode, Union, UseEnum
from xml.etree import ElementTree as etree

from . import LockedXmlTraits, XmlSer
from .abcs import UrlContainer
from .enums import FolderType


[docs] class Folder(LockedXmlTraits, UrlContainer): """A grouping of WWT content assets. Children can be: places (aka "Items"), imagesets, linesets, tours, folders, or IThumbnail objects (to be explored). """ name = Unicode("").tag(xml=XmlSer.attr("Name")) group = Unicode("Explorer").tag(xml=XmlSer.attr("Group")) url = Unicode("").tag(xml=XmlSer.attr("Url")) """The URL at which the full contents of this folder can be downloaded in WTML format. """ thumbnail = Unicode("").tag(xml=XmlSer.attr("Thumbnail")) browseable = Bool(True).tag(xml=XmlSer.attr("Browseable")) searchable = Bool(True).tag(xml=XmlSer.attr("Searchable")) type = UseEnum( FolderType, default_value=FolderType.UNSPECIFIED, ).tag(xml=XmlSer.attr("Type")) sub_type = Unicode("").tag(xml=XmlSer.attr("SubType")) msr_community_id = Int(0).tag(xml=XmlSer.attr("MSRCommunityId"), xml_omit_zero=True) """The ID number of the WWT Community that this content came from.""" msr_component_id = Int(0).tag(xml=XmlSer.attr("MSRComponentId"), xml_omit_zero=True) """The ID number of this content item on the WWT Communities system.""" permission = Int(0).tag(xml=XmlSer.attr("Permission"), xml_omit_zero=True) "TBD." children = List( trait=Union( [ Instance("wwt_data_formats.folder.Folder", args=()), Instance("wwt_data_formats.place.Place", args=()), Instance("wwt_data_formats.imageset.ImageSet", args=()), ] ), default_value=(), ).tag(xml=XmlSer.inner_list()) def _tag_name(self): return "Folder"
[docs] def walk(self, download=False): yield (0, (), self) for index, child in enumerate(self.children): if isinstance(child, Folder): if not len(child.children) and child.url and download: url = child.url child = Folder.from_url(url) child.url = url self.children[index] = child for depth, path, subchild in child.walk(download=download): yield (depth + 1, (index,) + path, subchild) else: yield (1, (index,), child)
[docs] def mutate_urls(self, mutator): if self.url: self.url = mutator(self.url) if self.thumbnail: self.thumbnail = mutator(self.thumbnail) for c in self.children: c.mutate_urls(mutator)
[docs] def immediate_imagesets(self): """ Generate a sequence of the imagesets defined in this folder, without recursion into any child folders. Returns ------- A generator of tuples of ``(child_index, item_type, imageset)``, described below. Notes ----- In the generated tuples, ``child_index`` is the index number of the item within the folder's :attr:`~Folder.children` array and ``imageset`` is the :class:`~wwt_data_formats.imageset.ImageSet` object contained within the folder. If ``item_type`` is ``None``, that indicates that the imageset corresponds to an imageset child that is defined directly in the folder contents. It may also be a string indicating that the imageset is defined by a different kind of potential folder child. Allowed values are ``"place_imageset"``, ``"place_foreground"``, or ``"place_background"``, for different imagesets that may be contained within a :class:`~wwt_data_formats.place.Place` item in the folder. Examples -------- Consider a folder that has two children: an imageset, and a place. The place in turn defines both a :attr:`~wwt_data_formats.place.Place.foreground_image_set` and a :attr:`~wwt_data_formats.place.Place.background_image_set`. The generator returned by this function will yield three values: ``(0, None, <ImageSet>)``, ``(1, "place_foreground", <ImageSet>)``, and ``(1, "place_background", <ImageSet>)``. """ from .imageset import ImageSet from .place import Place for index, child in enumerate(self.children): if isinstance(child, ImageSet): yield (index, None, child) elif isinstance(child, Place): if child.image_set is not None: yield (index, "place_imageset", child.image_set) if child.foreground_image_set is not None: yield (index, "place_foreground", child.foreground_image_set) if child.background_image_set is not None: yield (index, "place_background", child.background_image_set)
[docs] def make_absolutizing_url_mutator(baseurl): """Return a function that makes relative URLs absolute. Parameters ---------- baseurl : string, absolute URL The absolute URL with which to combine relative URLs Returns ------- A mutator function suitable for use with :meth:`wwt_data_formats.abcs.UrlContainer.mutate_urls`. Notes ----- This function is designed for usage with :meth:`wwt_data_formats.abcs.UrlContainer.mutate_urls`. It returns a mutator function that can be passed to this method. The mutator will take relative URLs and make them absolute by combining them with the *baseurl* argument. Input URLs that are already absolute will be unchanged. """ from urllib.parse import urljoin, urlsplit def mutator(url): if not url: return url if urlsplit(url).netloc: return url # this URL is absolute return urljoin(baseurl, url) return mutator
[docs] def make_filesystem_url_mutator(basedir): """Return a function that converts relative URLs to filesystem paths. Parameters ---------- basedir : string, path An absolute path that the relative URLs will be combined with. Returns ------- A mutator function suitable for use with :meth:`wwt_data_formats.abcs.UrlContainer.mutate_urls`. Notes ----- This function is designed for usage with :meth:`wwt_data_formats.abcs.UrlContainer.mutate_urls`. It returns a mutator function that can be passed to this method. The mutator will take relative URLs and convert them to filesystem paths by combining them with the *basedir* argument. Input URLs that are absolute will be unchanged. """ from urllib.parse import unquote, urlsplit def mutator(url): if not url: return url split = urlsplit(url) if split.netloc: return url # this URL is absolute # TODO: this should work with '..' but pretty much only by luck return os.path.join(basedir, *(unquote(s) for s in split.path.split("/"))) return mutator
def _sanitize_name(name): s = re.sub("[^-_a-zA-Z0-9]+", "_", name) s = re.sub("^_+", "", s) s = re.sub("_+$", "", s) return s
[docs] def fetch_folder_tree(root_url, root_cache_path, on_fetch=None): done_urls = set() def get_folder(url): if url in done_urls: return None, None if on_fetch is not None: on_fetch(url) resp = requests.get(url) resp.encoding = "utf-8-sig" # see LockedXmlTraits.from_url() elem = etree.fromstring(resp.text) done_urls.add(url) return resp.text, Folder.from_xml(elem) root_text, root_folder = get_folder(root_url) with open(os.path.join(root_cache_path, "index.wtml"), "wt", encoding="utf8") as f: f.write(root_text) def walk(cur_folder, cur_cache_path): for index, child in enumerate(cur_folder.children): if not isinstance(child, Folder): continue text = None subdir_base = f"{index:03d}_{_sanitize_name(child.name)}" child_cache_path = os.path.join(cur_cache_path, subdir_base) if not len(child.children) and child.url: text, child = get_folder(child.url) if child is None: continue os.makedirs(child_cache_path, exist_ok=True) with open( os.path.join(child_cache_path, "index.wtml"), "wt", encoding="utf8" ) as f: f.write(text) walk(child, child_cache_path) walk(root_folder, root_cache_path)
[docs] def walk_cached_folder_tree(root_cache_path): seen_urls = set() root_folder = Folder.from_file(os.path.join(root_cache_path, "index.wtml")) def walk(cur_treepath, cur_folder, cur_cache_path): yield (cur_treepath, cur_folder) for index, child in enumerate(cur_folder.children): child_treepath = cur_treepath + (index,) if not isinstance(child, Folder): yield (child_treepath, child) else: subdir_base = f"{index:03d}_{_sanitize_name(child.name)}" child_cache_path = os.path.join(cur_cache_path, subdir_base) if not len(child.children) and child.url: if child.url in seen_urls: continue seen_urls.add(child.url) child = Folder.from_file( os.path.join(child_cache_path, "index.wtml") ) for sub_treepath, sub_child in walk( child_treepath, child, child_cache_path ): yield (sub_treepath, sub_child) for info in walk((), root_folder, root_cache_path): yield info