NIH | National Cancer Institute | NCI Wiki  

WIKI MAINTENANCE NOTICE

Please be advised that NCI Wiki will be undergoing maintenance Monday, July 22nd between 1700 ET and 1800 ET and will be unavailable during this period.
Please ensure all work is saved before said time.

If you have any questions or concerns, please contact the CBIIT Atlassian Management Team.

Error rendering macro 'rw-search'

null

You are viewing an old version of this page. View the current version.

Compare with Current View Page History

« Previous Version 3 Next »

Java Code
/*
 * Copyright: (c) 2004-2009 Mayo Foundation for Medical Education and 
 * Research (MFMER). All rights reserved. MAYO, MAYO CLINIC, and the
 * triple-shield Mayo logo are trademarks and service marks of MFMER.
 *
 * Except as contained in the copyright notice above, or as used to identify 
 * MFMER as the author of this software, the trade names, trademarks, service
 * marks, or product names of the copyright holder shall not be used in
 * advertising, promotion or otherwise in connection with this software without
 * prior written authorization of the copyright holder.
 * 
 * Licensed under the Eclipse Public License, Version 1.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at 
 * 
 * http://www.eclipse.org/legal/epl-v10.html
 * 
 */
package org.LexGrid.LexBIG.example;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

import org.LexGrid.LexBIG.DataModel.Collections.AssociatedConceptList;
import org.LexGrid.LexBIG.DataModel.Collections.AssociationList;
import org.LexGrid.LexBIG.DataModel.Collections.LocalNameList;
import org.LexGrid.LexBIG.DataModel.Collections.ResolvedConceptReferenceList;
import org.LexGrid.LexBIG.DataModel.Core.AssociatedConcept;
import org.LexGrid.LexBIG.DataModel.Core.Association;
import org.LexGrid.LexBIG.DataModel.Core.CodingSchemeSummary;
import org.LexGrid.LexBIG.DataModel.Core.CodingSchemeVersionOrTag;
import org.LexGrid.LexBIG.DataModel.Core.ResolvedConceptReference;
import org.LexGrid.LexBIG.DataModel.InterfaceElements.CodingSchemeRendering;
import org.LexGrid.LexBIG.Exceptions.LBException;
import org.LexGrid.LexBIG.Extensions.Generic.LexBIGServiceConvenienceMethods;
import org.LexGrid.LexBIG.Impl.LexBIGServiceImpl;
import org.LexGrid.LexBIG.LexBIGService.CodedNodeGraph;
import org.LexGrid.LexBIG.LexBIGService.CodedNodeSet;
import org.LexGrid.LexBIG.LexBIGService.LexBIGService;
import org.LexGrid.LexBIG.LexBIGService.CodedNodeSet.ActiveOption;
import org.LexGrid.LexBIG.Utility.Constructors;
import org.LexGrid.LexBIG.Utility.Iterators.ResolvedConceptReferencesIterator;
import org.LexGrid.annotations.LgAdminFunction;
import org.LexGrid.codingSchemes.CodingScheme;
import org.LexGrid.commonTypes.Property;
import org.LexGrid.commonTypes.PropertyQualifier;
import org.LexGrid.concepts.Concept;
import org.LexGrid.concepts.Entity;
import org.LexGrid.naming.SupportedAssociation;
import org.LexGrid.naming.SupportedProperty;
import org.LexGrid.naming.SupportedPropertyQualifier;
import org.apache.commons.cli.BasicParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;

/**
 * 
 * Requires loading valid scheme (must have root node named @ pointing to top
 * nodes) Profiles a coding scheme based on unique URN, version, relation and
 * scheme name.
 * 
 * Note: If the URN and version values are unspecified, a list of available
 * coding schemes will be presented for user selection.
 * 
*     Example: java org.LexGrid.LexBIG.example.ProfileScheme
 *      -u, --urn &lturn&> URN uniquely identifying the code system.
 *      -v, --version &ltversionId&> Version identifier.
 * 
 *     Example: java -Xmx1500m -cp lgRuntime.jar
 *     org.LexGrid.LexBIG.example.ProfileScheme
 *       -u 'urn:oid:2.16.840.1.113883.3.26.1.1' -v '05.09e'
* 
 * @author <A HREF="mailto: scott.bauer@mayo.edu">Scott Bauer</A>
 * @author <A HREF="mailto: johnson.thomas@mayo.edu">Thomas Johnson</A>
 * 
 */
@LgAdminFunction
public class ProfileScheme {
    int combinedDepth, numBranches, numBranchPoints;
    Map<String, Integer> prop2Freq = new HashMap<String, Integer>();
    Map<String, Integer> propQual2Freq = new HashMap<String, Integer>();
    Map<String, Integer> assoc2srcFreq = new HashMap<String, Integer>();
    Map<String, Integer> assoc2tgtFreq = new HashMap<String, Integer>();

    /**
     * Association type defaults to hasSubtype in this constructor
     */
    public ProfileScheme() {
        super();
    }

    /**
     * @param args
     */
    public static void main(String[] args) {
        try {
            new ProfileScheme().run(args);
        } catch (LBException lbe) {
            Util.displayTaggedMessage("Operation failed: " + lbe.getMessage());
        } catch (Exception e) {
            Util.displayAndLogError("Operation failed", e);
        }
    }

    /**
     * Primary entry point for the program.
     * 
     * @param args
     * @throws Exception
     */
    public void run(String[] args) throws Exception {
        // Initialize commonly referenced services ...
        LexBIGService lbs = LexBIGServiceImpl.defaultInstance();

        // Parse the command line ...
        CommandLine cl = null;
        Options options = getCommandOptions();
        try {
            cl = new BasicParser().parse(options, args);
        } catch (ParseException e) {
            Util.displayCommandOptions("ProfileScheme", options,
                    "ProfileScheme -u \"urn:oid:2.16.840.1.113883.3.26.1.1\" " + "-v \"05.09e\"", e);
            Util.displayMessage(Util.promptForSchemeHelp());
            return;
        }

        // Interpret provided values ...
        String urn = cl.getOptionValue("u");
        String ver = cl.getOptionValue("v");
        CodingSchemeSummary css = null;

        // Find in list of registered vocabularies ...
        if (urn != null && ver != null) {
            urn = urn.trim();
            ver = ver.trim();
            Enumeration<CodingSchemeRendering> schemes = lbs.getSupportedCodingSchemes()
                    .enumerateCodingSchemeRendering();
            while (schemes.hasMoreElements() && css == null) {
                CodingSchemeSummary summary = schemes.nextElement().getCodingSchemeSummary();
                if (urn.equalsIgnoreCase(summary.getCodingSchemeURI())
                        && ver.equalsIgnoreCase(summary.getRepresentsVersion()))
                    css = summary;
            }
        }

        // Found it? If not, prompt...
        if (css == null) {
            if (urn != null || ver != null) {
                Util.displayMessage("No matching coding scheme was found for the given URN or version.");
                Util.displayMessage("");
            }
            css = Util.promptForCodeSystem();
            if (css == null)
                return;
        }

        runReport(lbs, css);
    }

    /**
     * Initiates profiling of different code system attributes and displays a
     * report for the caller.
     */
    protected void runReport(LexBIGService lbs, CodingSchemeSummary css) throws LBException {
        LexBIGServiceConvenienceMethods lbscm = (LexBIGServiceConvenienceMethods) lbs
                .getGenericExtension("LexBIGServiceConvenienceMethods");

        Util.displayMessage("==============================================================================");
        Util.displayMessage("==============================================================================");
        Util.displayMessage("NOTE: This may take some time depending on the vocabulary size.");
        Util.displayMessage("");

        // Get the name and version of the coding scheme ...
        String schemeName = css.getLocalName(); // local scheme name
        CodingSchemeVersionOrTag csVorT = Constructors.createCodingSchemeVersionOrTagFromVersion(css
                .getRepresentsVersion()); // Coding Scheme version

        // Output Number of concepts and the time in milliseconds required to
        // enumerate...
        long begin = System.currentTimeMillis();
        Util.displayMessage("==============================================================================");
        Util.displayMessage("Profiling Concepts ...");
        Util.displayMessage("==============================================================================");
        int conceptCount = profileConcepts(lbs, schemeName, csVorT);
        Util.displayMessage("");
        Util.displayMessage("Number of concepts: " + conceptCount);

        Set<String> sortedNames = new TreeSet<String>(prop2Freq.keySet());
        if (sortedNames.size() > 0) {
            Util.displayMessage("Number of concepts assigned the following properties:");
            for (Iterator<String> propNames = sortedNames.iterator(); propNames.hasNext();) {
                String propName = propNames.next();
                Util.displayMessage("  " + propName + ": " + prop2Freq.get(propName));
            }
        }
        sortedNames = new TreeSet<String>(propQual2Freq.keySet());
        if (sortedNames.size() > 0) {
            Util.displayMessage("Number of concepts assigned the following property qualifiers:");
            for (Iterator<String> qualNames = sortedNames.iterator(); qualNames.hasNext();) {
                String qualName = qualNames.next();
                Util.displayMessage("  " + qualName + ": " + propQual2Freq.get(qualName));
            }
        }
        Util.displayMessage("Time elapsed (ms): " + (System.currentTimeMillis() - begin) + "\n");

        begin = System.currentTimeMillis();
        Util.displayMessage("==============================================================================");
        Util.displayMessage("Profiling Associations ...");
        Util.displayMessage("==============================================================================");
        profileAssociations(lbs, lbscm, schemeName, csVorT);
        sortedNames = new TreeSet<String>(assoc2srcFreq.keySet());
        if (sortedNames.size() > 0) {
            Util.displayMessage("");
            for (Iterator<String> assocNames = sortedNames.iterator(); assocNames.hasNext();) {
                String assocName = assocNames.next();
                Util.displayMessage("Name: '" + assocName + "' fwd: '"
                        + lbscm.getAssociationForwardName(assocName, schemeName, csVorT) + "' rev: '"
                        + lbscm.getAssociationReverseName(assocName, schemeName, csVorT) + '\'');
                int srcFreq = assoc2srcFreq.get(assocName);
                int tgtFreq = assoc2tgtFreq.get(assocName);
                if (srcFreq >= 0 && tgtFreq >= 0) {
                    Util.displayMessage("  # sources: " + srcFreq + "  targets: " + tgtFreq);
                }
            }
        }
        Util.displayMessage("Time elapsed (ms): " + (System.currentTimeMillis() - begin) + "\n");

        // Get the top nodes for each supported hierarchy and recurse through
        // branches ...
        String[] hIDs = lbscm.getHierarchyIDs(schemeName, csVorT);
        for (String hID : hIDs) {
            // Initialize processing for hierarchy and print header ...
            combinedDepth = 0;
            numBranches = 0;
            numBranchPoints = 0;
            List<ResolvedConceptReference> deepestPath = new ArrayList<ResolvedConceptReference>();
            Util.displayMessage("==============================================================================");
            Util.displayMessage("Profiling Hierarchy: " + hID);
            Util.displayMessage("==============================================================================");
            ResolvedConceptReferenceList refList = lbscm.getHierarchyRoots(schemeName, csVorT, hID);
            ResolvedConceptReference[] rcref = refList.getResolvedConceptReference();

            // Starting from root nodes, evaluate each hierarchy ...
            begin = System.currentTimeMillis();
            for (ResolvedConceptReference rcr : rcref) {
                Util.displayMessage("Checking Root: " + rcr.getConceptCode() + ':'
                        + rcr.getEntityDescription().getContent());
                List<ResolvedConceptReference> currentPath = profileHierarchy(lbs, lbscm, schemeName, csVorT, hID, rcr,
                        0);
                if (currentPath.size() > 0) {
                    Util.displayMessage("");
                    Util.displayMessage("Example of Deepest Branch: ");
                    printPath(currentPath);
                }
                if (currentPath.size() > deepestPath.size())
                    deepestPath = currentPath;
                Util.displayMessage("");
            }
            Util.displayMessage("Number of Leaf Nodes ......................... : " + numBranches);
            Util.displayMessage("Number of Intermediate Nodes ................. : " + numBranchPoints);
            Util.displayMessage("Average Branch Depth for Hierarchy (all roots) : " + (combinedDepth / numBranches));
            Util.displayMessage("Deepest Branch Level for Hierarchy (all roots) : " + deepestPath.size());
            Util.displayMessage("Time elapsed (ms): " + (System.currentTimeMillis() - begin));
            Util.displayMessage("");
        }
    }

    /**
     * @param lbs
     *            - service object
     * @param schemeName
     *            - coding scheme name
     * @param csVorT
     *            - coding scheme version or tag (version in this case)
     * @return the number of enumerated concepts
     * @throws LBException
     */
    protected int profileConcepts(LexBIGService lbs, String schemeName, CodingSchemeVersionOrTag csVorT)
            throws LBException {
        // Init frequency for supported properties and qualifiers. This way
        // we will know if property or qualifier was registered but not used.
        CodingScheme scheme = lbs.resolveCodingScheme(schemeName, csVorT);
        for (SupportedProperty supported : scheme.getMappings().getSupportedProperty()) {
            prop2Freq.put(supported.getLocalId(), 0);
        }
        for (SupportedPropertyQualifier supported : scheme.getMappings().getSupportedPropertyQualifier()) {
            propQual2Freq.put(supported.getLocalId(), 0);
        }

        // Iterate through all concepts ...
        int count = 0;
        CodedNodeSet cns = lbs.getCodingSchemeConcepts(schemeName, csVorT);
        cns.restrictToStatus(ActiveOption.ACTIVE_ONLY, null);

        ResolvedConceptReferencesIterator rcrIt = cns.resolve(null, null, null, null, true);
        while (rcrIt.hasNext()) {
            // Bump the concept count.
            count++;
            ResolvedConceptReference rcr = rcrIt.next();

            // Create a combined list of all embedded properties ...
            Entity node = rcr.getEntity();
            List<Property> properties = new ArrayList<Property>();
            properties.addAll(Arrays.asList(node.getComment()));
            properties.addAll(Arrays.asList(node.getProperty()));
            properties.addAll(Arrays.asList(node.getDefinition()));
            properties.addAll(Arrays.asList(node.getPresentation()));

            // To keep track of processed items; we only want to
            // count a property or qualifier once per concept.
            Set<String> propsAlreadyProcessed = new HashSet<String>();
            Set<String> qualsAlreadyProcessed = new HashSet<String>();

            // Iterate over all properties and qualifiers...
            for (Property prop : properties) {
                String propName = prop.getPropertyName();
                if (!propsAlreadyProcessed.contains(propName)) {
                    Integer propCount = prop2Freq.get(propName);
                    prop2Freq.put(propName, propCount == null ? 1 : propCount + 1);
                    propsAlreadyProcessed.add(propName);
                }
                for (PropertyQualifier qual : prop.getPropertyQualifier()) {
                    String qualName = qual.getPropertyQualifierName();
                    if (!qualsAlreadyProcessed.contains(qualName)) {
                        Integer propCount = propQual2Freq.get(qualName);
                        propQual2Freq.put(qualName, propCount == null ? 1 : propCount + 1);
                        qualsAlreadyProcessed.add(qualName);
                    }
                }
            }
            // Since this step can be extensive provide periodic feedback to the
            // user
            // to indicate the process is still active ...
            if (count % 100 == 0)
                System.out.print('.');
            if (count % 7800 == 0)
                System.out.println();
        }
        return count;
    }

    /**
     * @param lbs
     *            - service context
     * @param lbscm
     *            - convenience methods
     * @param csVorT
     *            - coding scheme version or tag (version in this case)
     * @throws LBException
     */
    protected void profileAssociations(LexBIGService lbs, LexBIGServiceConvenienceMethods lbscm, String schemeName,
            CodingSchemeVersionOrTag csVorT) throws LBException {
        // Init frequency for supported properties and qualifiers. This way
        // we will know if property or qualifier was registered but not used.
        CodingScheme scheme = lbs.resolveCodingScheme(schemeName, csVorT);
        Set<String> assocNames = new TreeSet<String>();
        for (SupportedAssociation supported : scheme.getMappings().getSupportedAssociation()) {
            assocNames.add(supported.getLocalId());
            assoc2srcFreq.put(supported.getLocalId(), 0);
            assoc2tgtFreq.put(supported.getLocalId(), 0);
        }

        // For each association, determine frequencies ...
        int count = 0;
        try {
            // Resolve and evaluate all source to target relationships.
            // Since this step can be extensive provide periodic feedback to the
            // user
            // to indicate the process is still active ...
            CodedNodeSet allCodes = lbs.getCodingSchemeConcepts(schemeName, csVorT);
            LocalNameList noProps = Constructors.createLocalNameList("--no-property--");
            ResolvedConceptReferencesIterator potentialSources = allCodes.resolve(null, noProps, null);

            // Brute force approach to evaluate each possible source, concept by
            // concept.
            // Time intensive, but scales better and helps avoid possible
            // bottlenecks related
            // to exceeding system-defined search limits.
            while (potentialSources.hasNext()) {
                ResolvedConceptReference potentialSource = potentialSources.next();
                CodedNodeGraph cng = lbs.getNodeGraph(schemeName, csVorT, null);
                ResolvedConceptReferenceList rcrList = cng.resolveAsList(potentialSource, true, false, -1, 1, noProps,
                        null, null, null, -1);
                for (ResolvedConceptReference rcr : rcrList.getResolvedConceptReference()) {
                    AssociationList assocList = rcr.getSourceOf();
                    for (Association assoc : assocList.getAssociation()) {
                        // Bump source count ...
                        String assocName = assoc.getAssociationName();
                        Integer freq = assoc2srcFreq.get(assocName);
                        assoc2srcFreq.put(assocName, (freq == null ? 1 : freq + 1));

                        // Bump target count ...
                        freq = assoc2tgtFreq.get(assocName);
                        int targetCount = assoc.getAssociatedConcepts().getAssociatedConceptCount();
                        assoc2tgtFreq.put(assocName, (freq == null ? targetCount : freq + targetCount));
                    }
                    if (count++ % 100 == 0)
                        System.out.print('.');
                    if (count % 7800 == 0)
                        System.out.println();
                }
            }
        } catch (Exception e) {
            Util.displayMessage(e.getMessage());
        }
    }

    /**
     * Recursive traversal method gauges maximum depth, average depth
     * 
     * @param lbs
     *            - service context
     * @param lbscm
     *            - convenience methods
     * @param schemeName
     *            - coding scheme name
     * @param csVorT
     *            - coding scheme version or tag (version in this case)
     * @param hID
     *            - hierarchy ID
     * @param root
     *            - concept to navigate from
     * @param level
     *            - current level of recursion
     * @return references representing deepest path in branch (root first)
     * @throws LBException
     */
    protected List<ResolvedConceptReference> profileHierarchy(LexBIGService lbs, LexBIGServiceConvenienceMethods lbscm,
            String schemeName, CodingSchemeVersionOrTag csVorT, String hID, ResolvedConceptReference root, int level)
            throws LBException {
        List<ResolvedConceptReference> deepestPath = new ArrayList<ResolvedConceptReference>();
        deepestPath.add(root);

        AssociationList assocList = lbscm.getHierarchyLevelNext(schemeName, csVorT, hID, root.getConceptCode(), false,
                null);
        if (assocList.getAssociationCount() > 0) {
            List<ResolvedConceptReference> deepestSubpath = new ArrayList<ResolvedConceptReference>();

            // Traverse registered associations ...
            for (Association assoc : assocList.getAssociation()) {
                AssociatedConceptList conceptList = assoc.getAssociatedConcepts();

                // If tree splits here, mark it as starting point of new
                // sub-branches.
                // Bump cumulative statistics, and since this recursion can be
                // extensive provide periodic feedback to the user to indicate
                // the
                // process is still active ...
                if (conceptList.getAssociatedConceptCount() > 1) {
                    numBranchPoints++;
                    if (numBranchPoints % 10 == 0)
                        System.out.print('.');
                    if (numBranchPoints % 780 == 0)
                        System.out.println();
                }

                // Recurse through all child nodes.
                for (Iterator<AssociatedConcept> subsumed = conceptList.iterateAssociatedConcept(); subsumed.hasNext();) {
                    AssociatedConcept childConcept = subsumed.next();

                    // Recurse to determine depth of sub-branch for this child.
                    List<ResolvedConceptReference> tempPath = profileHierarchy(lbs, lbscm, schemeName, csVorT, hID,
                            childConcept, level + 1);

                    // Sub-branch starting at this child is deepest so far?
                    if (tempPath.size() > deepestSubpath.size()) {
                        deepestSubpath = tempPath;
                    }
                }
            }

            // Add the deepest subpath to root for return as deepest path at
            // this level.
            deepestPath.addAll(deepestSubpath);

        } else {
            // Branch ends here, mark a complete branch and add depth to
            // cumulative stats.
            numBranches++;
            combinedDepth += (level + 1);
        }

        // Contains only the root if it is a leaf; otherwise follows with
        // an example path traversing from root to deepest level.
        return deepestPath;
    }

    /**
     * Print the description of concepts in the path, indenting for each
     * represented level.
     * 
     * @param path
     *            Chain of concept references, root first.
     */
    protected void printPath(List<ResolvedConceptReference> path) {
        for (int i = 0; i < path.size(); i++) {
            StringBuffer sb = new StringBuffer(256);
            ResolvedConceptReference ref = path.get(i);
            for (int j = 0; j <= i; j++)
                sb.append("  ");
            sb.append(ref.getConceptCode());
            if (ref.getEntityDescription() != null)
                sb.append(':').append(ref.getEntityDescription().getContent());
            Util.displayMessage(sb.toString());
        }
    }

    /**
     * Return supported command options.
     * 
     * @return org.apache.commons.cli.Options
     */
    /**
     * @return
     */
    private Options getCommandOptions() {
        Options options = new Options();
        Option o;

        o = new Option("u", "urn", true, "URN uniquely identifying the code system.");
        o.setArgName("name");
        o.setRequired(false);
        options.addOption(o);

        o = new Option("v", "version", true, "Version identifier.");
        o.setArgName("id");
        o.setRequired(false);
        options.addOption(o);

        return options;
    }

}
  • No labels