{"payload":{"pageCount":3,"repositories":[{"type":"Public","name":"tika-dockers","owner":"USCDataScience","isFork":false,"description":"A suite of Machine Learning / Deep Learning Dockerfiles to allow Apache Tika to extract objects and to produce textual captions for images and video","allTopics":["computer-vision","deep-learning","apache","apache-tika","computer-vision-tools","tika-python","docker","video","tensorflow","detection","tika","image-captioning","usc","usc-data-science"],"primaryLanguage":null,"pullRequestCount":2,"issueCount":1,"starsCount":20,"forksCount":6,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-04-04T14:41:28.502Z"}},{"type":"Public","name":"uscdatascience.github.io","owner":"USCDataScience","isFork":false,"description":"USC Information Retrieval and Data Science Group","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":9,"forksCount":25,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2024-02-16T20:59:26.042Z"}},{"type":"Public","name":"SentimentAnalysisParser","owner":"USCDataScience","isFork":false,"description":"Combines Apache OpenNLP and Apache Tika and provides facilities for automatically deriving sentiment from text.","allTopics":[],"primaryLanguage":null,"pullRequestCount":1,"issueCount":2,"starsCount":32,"forksCount":9,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-05-03T14:20:01.457Z"}},{"type":"Public","name":"sparkler","owner":"USCDataScience","isFork":false,"description":"Spark-Crawler: Apache Nutch-like crawler that runs on Apache Spark.","allTopics":["search","search-engine","distributed-systems","information-retrieval","big-data","spark","solr","web-crawler","nutch","tika"],"primaryLanguage":{"name":"Java","color":"#b07219"},"pullRequestCount":22,"issueCount":33,"starsCount":410,"forksCount":142,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-03-30T22:48:03.328Z"}},{"type":"Public","name":"polar.usc.edu","owner":"USCDataScience","isFork":false,"description":"Polar USC activities related to NSF Polar CyberInfrastructure program at the University of Southern California","allTopics":["science","polar","usc","trec","csci572","trec-dd-polar","d3-visualization"],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":15,"forksCount":35,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-01-15T23:43:50.315Z"}},{"type":"Public","name":"polar-deep-insights","owner":"USCDataScience","isFork":false,"description":"Conceptual - Temporal - Spatial analysis of the trec polar dataset","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":33,"issueCount":0,"starsCount":10,"forksCount":8,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2023-01-04T12:55:13.444Z"}},{"type":"Public","name":"NLTKRest","owner":"USCDataScience","isFork":false,"description":"This is a REST Server endpoint built using Flask and Python. ","allTopics":[],"primaryLanguage":{"name":"Java","color":"#b07219"},"pullRequestCount":2,"issueCount":1,"starsCount":23,"forksCount":14,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-11-16T09:28:09.353Z"}},{"type":"Public","name":"AgePredictor","owner":"USCDataScience","isFork":false,"description":"Age classification from text using PAN16, blogs, Fisher Callhome, and Cancer Forum","allTopics":["nlp","machine-learning","machine-learning-algorithms","datascience","irds","age","usc","age-classifier"],"primaryLanguage":{"name":"Java","color":"#b07219"},"pullRequestCount":6,"issueCount":6,"starsCount":15,"forksCount":11,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2022-07-01T20:37:54.914Z"}},{"type":"Public","name":"autoextractor","owner":"USCDataScience","isFork":true,"description":"A toolkit for clustering web pages based on various similarity measures.","allTopics":[],"primaryLanguage":{"name":"Java","color":"#b07219"},"pullRequestCount":0,"issueCount":3,"starsCount":33,"forksCount":13,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-10-27T17:27:11.547Z"}},{"type":"Public","name":"parser-indexer-py","owner":"USCDataScience","isFork":false,"description":"Python tools for parsing documents and building the inverted index with enriched metadata. Java version with slightly different features - https://github.com/USCDataScience/parser-indexer","allTopics":[],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":0,"issueCount":6,"starsCount":9,"forksCount":3,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-09-02T23:10:56.079Z"}},{"type":"Public","name":"supervising-ui","owner":"USCDataScience","isFork":false,"description":"Web UI for labelling dataset for supervised learning.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":2,"issueCount":1,"starsCount":77,"forksCount":24,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-06-07T03:00:55.340Z"}},{"type":"Public","name":"ufo.usc.edu","owner":"USCDataScience","isFork":false,"description":"Collection of projects from IRDS students studying unidentified flying objects","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":10,"issueCount":0,"starsCount":6,"forksCount":26,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-05-01T17:05:49.424Z"}},{"type":"Public","name":"sparkler-ui","owner":"USCDataScience","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":1,"issueCount":4,"starsCount":1,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2021-01-05T23:39:57.969Z"}},{"type":"Public","name":"Image-Similarity-Deep-Ranking","owner":"USCDataScience","isFork":false,"description":"Deep Ranking based ImageSimilarity will be developed as plugin on ImageSpace. https://users.eecs.northwestern.edu/~jwa368/pdfs/deep_ranking.pdf","allTopics":["python","pytorch","image-similarity","deep-ranking"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":1,"starsCount":36,"forksCount":10,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-05-28T02:52:00.109Z"}},{"type":"Public","name":"cmu-fg-bg-similarity","owner":"USCDataScience","isFork":false,"description":"CMU Foreground/Background Similarity Server from DARPA MEMEX","allTopics":["image-processing","image-recognition","cnn-for-visual-recognition","image-similarity","scalable-lsh","pool5","background-similarity","foreground-similarity","image-space","lsh","memex"],"primaryLanguage":{"name":"C++","color":"#f34b7d"},"pullRequestCount":0,"issueCount":0,"starsCount":7,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2020-04-20T04:40:24.164Z"}},{"type":"Public","name":"file-content-analyzer","owner":"USCDataScience","isFork":false,"description":"A set of python modules to perform Byte Frequency Analysis, Byte Frequency Correlation, Cross Correlation and FHT analysis on files","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":1,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2019-12-03T23:36:03.250Z"}},{"type":"Public","name":"pdi-topics","owner":"USCDataScience","isFork":false,"description":"LDA Topic Modeling for Polar Data Insights","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":3,"license":"GNU Lesser General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2019-10-07T16:47:04.036Z"}},{"type":"Public","name":"DDToolAnalysis","owner":"USCDataScience","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2019-04-16T23:35:19.556Z"}},{"type":"Public","name":"liresolr","owner":"USCDataScience","isFork":true,"description":"Putting LIRE into Solr - an ongoing project","allTopics":[],"primaryLanguage":{"name":"Java","color":"#b07219"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":40,"license":"GNU General Public License v2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2019-03-23T04:13:23.873Z"}},{"type":"Public","name":"sweet-neo4j","owner":"USCDataScience","isFork":true,"description":"A ruby parser using linkeddata and RDF to fetch the JPL Sweet ontology and load it into Neo4J for cool graph queries and examination.","allTopics":[],"primaryLanguage":{"name":"Ruby","color":"#701516"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":3,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-12-16T20:35:09.170Z"}},{"type":"Public","name":"deepsentirank","owner":"USCDataScience","isFork":false,"description":"Deep Learning based Sentiment Ranking for Multimedia","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":5,"forksCount":2,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-12-15T03:52:19.375Z"}},{"type":"Public","name":"PolarPostProcessing","owner":"USCDataScience","isFork":true,"description":"This code gets connected to Solr DB created for Sparkler Crawled Data to do further data extraction, classification, filtering and insights generation using various Machine Learning models. The ML models are capable of using keywords list from user, extract features from URL content, and classify (score) output and update Solr parameter accordin…","allTopics":["models","machine-learning-algorithms","crawling","sparkler"],"primaryLanguage":{"name":"Jupyter Notebook","color":"#DA5B0B"},"pullRequestCount":2,"issueCount":0,"starsCount":3,"forksCount":5,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-12-13T01:00:33.683Z"}},{"type":"Public","name":"tika-dl-models","owner":"USCDataScience","isFork":false,"description":"A place to release saved machine learning models for tika-dl","allTopics":["deep-learning","tensorflow","keras","apache-tika","dl4j","tika-dl"],"primaryLanguage":null,"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":0,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-09-28T11:27:26.064Z"}},{"type":"Public","name":"img2text","owner":"USCDataScience","isFork":false,"description":"Models, and associated helper code for GSOC 2017 project Tensorflow Image to Text in Apache Tika","allTopics":["deep-learning","models","usc","tesnorflow","irds"],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":8,"issueCount":0,"starsCount":7,"forksCount":18,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-08-21T00:05:28.852Z"}},{"type":"Public","name":"sce-domain-discovery","owner":"USCDataScience","isFork":true,"description":"Domain Discovery for the Sparkler Crawl Environment","allTopics":[],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":8,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-07-02T17:16:15.858Z"}},{"type":"Public","name":"PolarDataCollection","owner":"USCDataScience","isFork":true,"description":"Using Google Search API we collect URLs relevant to the Polar Domain for deep insights and intelligent crawling","allTopics":["search","google","extraction","urls","keywords","data-collection","polar"],"primaryLanguage":{"name":"HTML","color":"#e34c26"},"pullRequestCount":0,"issueCount":0,"starsCount":3,"forksCount":2,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-06-28T21:08:43.011Z"}},{"type":"Public","name":"pdftabextract","owner":"USCDataScience","isFork":true,"description":"A set of tools for extracting tables from PDF files helping to do data mining on (OCR-processed) scanned documents.","allTopics":[],"primaryLanguage":{"name":"Python","color":"#3572A5"},"pullRequestCount":0,"issueCount":0,"starsCount":2,"forksCount":367,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2018-01-09T08:01:08.786Z"}},{"type":"Public","name":"Ocean_Observation_FacetView","owner":"USCDataScience","isFork":true,"description":"This is a FacetView setup for ocean observation Crawled Data.","allTopics":[],"primaryLanguage":{"name":"JavaScript","color":"#f1e05a"},"pullRequestCount":0,"issueCount":0,"starsCount":0,"forksCount":1,"license":"Apache License 2.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2017-11-12T04:26:00.019Z"}},{"type":"Public","name":"svm-classifier-memex","owner":"USCDataScience","isFork":false,"description":"","allTopics":[],"primaryLanguage":{"name":"Java","color":"#b07219"},"pullRequestCount":0,"issueCount":0,"starsCount":6,"forksCount":0,"license":null,"participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2017-11-01T06:13:23.673Z"}},{"type":"Public","name":"tika-ner-corenlp","owner":"USCDataScience","isFork":true,"description":"Stanford CoreNLP NER addon for Apache Tika's NamerEntityParser","allTopics":[],"primaryLanguage":{"name":"Java","color":"#b07219"},"pullRequestCount":0,"issueCount":0,"starsCount":1,"forksCount":6,"license":"GNU General Public License v3.0","participation":null,"lastUpdated":{"hasBeenPushedTo":true,"timestamp":"2017-10-26T22:25:50.401Z"}}],"repositoryCount":65,"userInfo":null,"searchable":true,"definitions":[],"typeFilters":[{"id":"all","text":"All"},{"id":"public","text":"Public"},{"id":"source","text":"Sources"},{"id":"fork","text":"Forks"},{"id":"archived","text":"Archived"},{"id":"template","text":"Templates"}],"compactMode":false},"title":"Repositories"}